hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/include/linux/fs.h
....@@ -24,7 +24,6 @@
2424 #include <linux/capability.h>
2525 #include <linux/semaphore.h>
2626 #include <linux/fcntl.h>
27
-#include <linux/fiemap.h>
2827 #include <linux/rculist_bl.h>
2928 #include <linux/atomic.h>
3029 #include <linux/shrinker.h>
....@@ -37,15 +36,20 @@
3736 #include <linux/uuid.h>
3837 #include <linux/errseq.h>
3938 #include <linux/ioprio.h>
39
+#include <linux/fs_types.h>
40
+#include <linux/build_bug.h>
41
+#include <linux/stddef.h>
4042 #include <linux/android_kabi.h>
4143
4244 #include <asm/byteorder.h>
4345 #include <uapi/linux/fs.h>
46
+#include <linux/android_vendor.h>
4447
4548 struct backing_dev_info;
4649 struct bdi_writeback;
4750 struct bio;
4851 struct export_operations;
52
+struct fiemap_extent_info;
4953 struct hd_geometry;
5054 struct iovec;
5155 struct kiocb;
....@@ -64,6 +68,8 @@
6468 struct fscrypt_operations;
6569 struct fsverity_info;
6670 struct fsverity_operations;
71
+struct fs_context;
72
+struct fs_parameter_spec;
6773
6874 extern void __init inode_init(void);
6975 extern void __init inode_init_early(void);
....@@ -163,18 +169,16 @@
163169 #define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
164170
165171 /* File is capable of returning -EAGAIN if I/O will block */
166
-#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
172
+#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
173
+
174
+/* File represents mount that needs unmounting */
175
+#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
167176
168177 /* File does not contribute to nr_files count */
169
-#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
178
+#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
170179
171
-/*
172
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
173
- * that indicates that they should check the contents of the iovec are
174
- * valid, but not check the memory that the iovec elements
175
- * points too.
176
- */
177
-#define CHECK_IOVEC_ONLY -1
180
+/* File supports async buffered reads */
181
+#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
178182
179183 /*
180184 * Attribute flags. These should be or-ed together to figure out what
....@@ -285,6 +289,7 @@
285289 struct page;
286290 struct address_space;
287291 struct writeback_control;
292
+struct readahead_control;
288293
289294 /*
290295 * Write life time hint values.
....@@ -299,14 +304,20 @@
299304 WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
300305 };
301306
302
-#define IOCB_EVENTFD (1 << 0)
303
-#define IOCB_APPEND (1 << 1)
304
-#define IOCB_DIRECT (1 << 2)
305
-#define IOCB_HIPRI (1 << 3)
306
-#define IOCB_DSYNC (1 << 4)
307
-#define IOCB_SYNC (1 << 5)
308
-#define IOCB_WRITE (1 << 6)
309
-#define IOCB_NOWAIT (1 << 7)
307
+/* Match RWF_* bits to IOCB bits */
308
+#define IOCB_HIPRI (__force int) RWF_HIPRI
309
+#define IOCB_DSYNC (__force int) RWF_DSYNC
310
+#define IOCB_SYNC (__force int) RWF_SYNC
311
+#define IOCB_NOWAIT (__force int) RWF_NOWAIT
312
+#define IOCB_APPEND (__force int) RWF_APPEND
313
+
314
+/* non-RWF related bits - start at 16 */
315
+#define IOCB_EVENTFD (1 << 16)
316
+#define IOCB_DIRECT (1 << 17)
317
+#define IOCB_WRITE (1 << 18)
318
+/* iocb->ki_waitq is valid */
319
+#define IOCB_WAITQ (1 << 19)
320
+#define IOCB_NOIO (1 << 20)
310321
311322 struct kiocb {
312323 struct file *ki_filp;
....@@ -320,6 +331,10 @@
320331 int ki_flags;
321332 u16 ki_hint;
322333 u16 ki_ioprio; /* See linux/ioprio.h */
334
+ union {
335
+ unsigned int ki_cookie; /* for ->iopoll */
336
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
337
+ };
323338
324339 randomized_struct_fields_end
325340 };
....@@ -367,6 +382,7 @@
367382 */
368383 int (*readpages)(struct file *filp, struct address_space *mapping,
369384 struct list_head *pages, unsigned nr_pages);
385
+ void (*readahead)(struct readahead_control *);
370386
371387 int (*write_begin)(struct file *, struct address_space *mapping,
372388 loff_t pos, unsigned len, unsigned flags,
....@@ -420,24 +436,45 @@
420436 loff_t pos, unsigned len, unsigned copied,
421437 struct page *page, void *fsdata);
422438
439
+/**
440
+ * struct address_space - Contents of a cacheable, mappable object.
441
+ * @host: Owner, either the inode or the block_device.
442
+ * @i_pages: Cached pages.
443
+ * @gfp_mask: Memory allocation flags to use for allocating pages.
444
+ * @i_mmap_writable: Number of VM_SHARED mappings.
445
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
446
+ * @i_mmap: Tree of private and shared mappings.
447
+ * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
448
+ * @nrpages: Number of page entries, protected by the i_pages lock.
449
+ * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
450
+ * @writeback_index: Writeback starts here.
451
+ * @a_ops: Methods.
452
+ * @flags: Error bits and flags (AS_*).
453
+ * @wb_err: The most recent error which has occurred.
454
+ * @private_lock: For use by the owner of the address_space.
455
+ * @private_list: For use by the owner of the address_space.
456
+ * @private_data: For use by the owner of the address_space.
457
+ */
423458 struct address_space {
424
- struct inode *host; /* owner: inode, block_device */
425
- struct radix_tree_root i_pages; /* cached pages */
426
- atomic_t i_mmap_writable;/* count VM_SHARED mappings */
427
- struct rb_root_cached i_mmap; /* tree of private and shared mappings */
428
- struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
429
- /* Protected by the i_pages lock */
430
- unsigned long nrpages; /* number of total pages */
431
- /* number of shadow or DAX exceptional entries */
459
+ struct inode *host;
460
+ struct xarray i_pages;
461
+ gfp_t gfp_mask;
462
+ atomic_t i_mmap_writable;
463
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
464
+ /* number of thp, only for non-shmem files */
465
+ atomic_t nr_thps;
466
+#endif
467
+ struct rb_root_cached i_mmap;
468
+ struct rw_semaphore i_mmap_rwsem;
469
+ unsigned long nrpages;
432470 unsigned long nrexceptional;
433
- pgoff_t writeback_index;/* writeback starts here */
434
- const struct address_space_operations *a_ops; /* methods */
435
- unsigned long flags; /* error bits */
436
- spinlock_t private_lock; /* for use by the address_space */
437
- gfp_t gfp_mask; /* implicit gfp mask for allocations */
438
- struct list_head private_list; /* for use by the address_space */
439
- void *private_data; /* ditto */
471
+ pgoff_t writeback_index;
472
+ const struct address_space_operations *a_ops;
473
+ unsigned long flags;
440474 errseq_t wb_err;
475
+ spinlock_t private_lock;
476
+ struct list_head private_list;
477
+ void *private_data;
441478
442479 ANDROID_KABI_RESERVE(1);
443480 ANDROID_KABI_RESERVE(2);
....@@ -449,69 +486,38 @@
449486 * must be enforced here for CRIS, to let the least significant bit
450487 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
451488 */
452
-struct request_queue;
453489
454
-struct block_device {
455
- dev_t bd_dev; /* not a kdev_t - it's a search key */
456
- int bd_openers;
457
- struct inode * bd_inode; /* will die */
458
- struct super_block * bd_super;
459
- struct mutex bd_mutex; /* open/close mutex */
460
- void * bd_claiming;
461
- void * bd_holder;
462
- int bd_holders;
463
- bool bd_write_holder;
464
-#ifdef CONFIG_SYSFS
465
- struct list_head bd_holder_disks;
466
-#endif
467
- struct block_device * bd_contains;
468
- unsigned bd_block_size;
469
- u8 bd_partno;
470
- struct hd_struct * bd_part;
471
- /* number of times partitions within this device have been opened. */
472
- unsigned bd_part_count;
473
- int bd_invalidated;
474
- struct gendisk * bd_disk;
475
- struct request_queue * bd_queue;
476
- struct backing_dev_info *bd_bdi;
477
- struct list_head bd_list;
478
- /*
479
- * Private data. You must have bd_claim'ed the block_device
480
- * to use this. NOTE: bd_claim allows an owner to claim
481
- * the same device multiple times, the owner must take special
482
- * care to not mess up bd_private for that case.
483
- */
484
- unsigned long bd_private;
485
-
486
- /* The counter of freeze processes */
487
- int bd_fsfreeze_count;
488
- /* Mutex for freeze */
489
- struct mutex bd_fsfreeze_mutex;
490
-
491
- ANDROID_KABI_RESERVE(1);
492
- ANDROID_KABI_RESERVE(2);
493
- ANDROID_KABI_RESERVE(3);
494
- ANDROID_KABI_RESERVE(4);
495
-} __randomize_layout;
490
+/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
491
+#define PAGECACHE_TAG_DIRTY XA_MARK_0
492
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
493
+#define PAGECACHE_TAG_TOWRITE XA_MARK_2
496494
497495 /*
498
- * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
499
- * radix trees
496
+ * Returns true if any of the pages in the mapping are marked with the tag.
500497 */
501
-#define PAGECACHE_TAG_DIRTY 0
502
-#define PAGECACHE_TAG_WRITEBACK 1
503
-#define PAGECACHE_TAG_TOWRITE 2
504
-
505
-int mapping_tagged(struct address_space *mapping, int tag);
498
+static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
499
+{
500
+ return xa_marked(&mapping->i_pages, tag);
501
+}
506502
507503 static inline void i_mmap_lock_write(struct address_space *mapping)
508504 {
509505 down_write(&mapping->i_mmap_rwsem);
510506 }
511507
508
+static inline int i_mmap_trylock_write(struct address_space *mapping)
509
+{
510
+ return down_write_trylock(&mapping->i_mmap_rwsem);
511
+}
512
+
512513 static inline void i_mmap_unlock_write(struct address_space *mapping)
513514 {
514515 up_write(&mapping->i_mmap_rwsem);
516
+}
517
+
518
+static inline int i_mmap_trylock_read(struct address_space *mapping)
519
+{
520
+ return down_read_trylock(&mapping->i_mmap_rwsem);
515521 }
516522
517523 static inline void i_mmap_lock_read(struct address_space *mapping)
....@@ -524,6 +530,16 @@
524530 up_read(&mapping->i_mmap_rwsem);
525531 }
526532
533
+static inline void i_mmap_assert_locked(struct address_space *mapping)
534
+{
535
+ lockdep_assert_held(&mapping->i_mmap_rwsem);
536
+}
537
+
538
+static inline void i_mmap_assert_write_locked(struct address_space *mapping)
539
+{
540
+ lockdep_assert_held_write(&mapping->i_mmap_rwsem);
541
+}
542
+
527543 /*
528544 * Might pages of this file be mapped into userspace?
529545 */
....@@ -534,7 +550,7 @@
534550
535551 /*
536552 * Might pages of this file have been modified in userspace?
537
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
553
+ * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
538554 * marks vma as VM_SHARED if it is shared, and the file was opened for
539555 * writing i.e. vma may be mprotected writable even if now readonly.
540556 *
....@@ -685,10 +701,13 @@
685701 atomic_t i_count;
686702 atomic_t i_dio_count;
687703 atomic_t i_writecount;
688
-#ifdef CONFIG_IMA
704
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
689705 atomic_t i_readcount; /* struct files open RO */
690706 #endif
691
- const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
707
+ union {
708
+ const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
709
+ void (*free_inode)(struct inode *);
710
+ };
692711 struct file_lock_context *i_flctx;
693712 struct address_space i_data;
694713 struct list_head i_devices;
....@@ -697,7 +716,7 @@
697716 struct block_device *i_bdev;
698717 struct cdev *i_cdev;
699718 char *i_link;
700
- unsigned __i_dir_seq;
719
+ unsigned i_dir_seq;
701720 };
702721
703722 __u32 i_generation;
....@@ -720,6 +739,8 @@
720739 ANDROID_KABI_RESERVE(1);
721740 ANDROID_KABI_RESERVE(2);
722741 } __randomize_layout;
742
+
743
+struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
723744
724745 static inline unsigned int i_blocksize(const struct inode *node)
725746 {
....@@ -837,7 +858,7 @@
837858 i_size = inode->i_size;
838859 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
839860 return i_size;
840
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
861
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
841862 loff_t i_size;
842863
843864 preempt_disable();
....@@ -862,7 +883,7 @@
862883 inode->i_size = i_size;
863884 write_seqcount_end(&inode->i_size_seqcount);
864885 preempt_enable();
865
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
886
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
866887 preempt_disable();
867888 inode->i_size = i_size;
868889 preempt_enable();
....@@ -880,8 +901,6 @@
880901 {
881902 return MAJOR(inode->i_rdev);
882903 }
883
-
884
-extern struct block_device *I_BDEV(struct inode *inode);
885904
886905 struct fown_struct {
887906 rwlock_t lock; /* protects pid, uid, euid fields */
....@@ -952,6 +971,11 @@
952971 #endif /* #ifdef CONFIG_EPOLL */
953972 struct address_space *f_mapping;
954973 errseq_t f_wb_err;
974
+ errseq_t f_sb_err; /* for syncfs */
975
+
976
+ ANDROID_KABI_RESERVE(1);
977
+ ANDROID_KABI_RESERVE(2);
978
+ ANDROID_OEM_DATA(1);
955979 } __randomize_layout
956980 __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
957981
....@@ -970,7 +994,6 @@
970994 #define get_file_rcu_many(x, cnt) \
971995 atomic_long_add_unless(&(x)->f_count, (cnt), 0)
972996 #define get_file_rcu(x) get_file_rcu_many((x), 1)
973
-#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
974997 #define file_count(x) atomic_long_read(&(x)->f_count)
975998
976999 #define MAX_NON_LFS ((1UL<<31) - 1)
....@@ -1018,8 +1041,6 @@
10181041 };
10191042
10201043 struct lock_manager_operations {
1021
- int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
1022
- unsigned long (*lm_owner_key)(struct file_lock *);
10231044 fl_owner_t (*lm_get_owner)(fl_owner_t);
10241045 void (*lm_put_owner)(fl_owner_t);
10251046 void (*lm_notify)(struct file_lock *); /* unblock callback */
....@@ -1027,6 +1048,7 @@
10271048 bool (*lm_break)(struct file_lock *);
10281049 int (*lm_change)(struct file_lock *, int, struct list_head *);
10291050 void (*lm_setup)(struct file_lock *, void **);
1051
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
10301052
10311053 ANDROID_KABI_RESERVE(1);
10321054 ANDROID_KABI_RESERVE(2);
....@@ -1068,10 +1090,15 @@
10681090 * Obviously, the last two criteria only matter for POSIX locks.
10691091 */
10701092 struct file_lock {
1071
- struct file_lock *fl_next; /* singly linked list for this inode */
1093
+ struct file_lock *fl_blocker; /* The lock, that is blocking us */
10721094 struct list_head fl_list; /* link into file_lock_context */
10731095 struct hlist_node fl_link; /* node in global lists */
1074
- struct list_head fl_block; /* circular list of blocked processes */
1096
+ struct list_head fl_blocked_requests; /* list of requests with
1097
+ * ->fl_blocker pointing here
1098
+ */
1099
+ struct list_head fl_blocked_member; /* node in
1100
+ * ->fl_blocker->fl_blocked_requests
1101
+ */
10751102 fl_owner_t fl_owner;
10761103 unsigned int fl_flags;
10771104 unsigned char fl_type;
....@@ -1095,6 +1122,7 @@
10951122 struct {
10961123 struct list_head link; /* link in AFS vnode's pending_locks list */
10971124 int state; /* state of grant or error if -ve */
1125
+ unsigned int debug_id;
10981126 } afs;
10991127 } fl_u;
11001128
....@@ -1147,16 +1175,22 @@
11471175 extern void locks_release_private(struct file_lock *);
11481176 extern void posix_test_lock(struct file *, struct file_lock *);
11491177 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
1150
-extern int posix_unblock_lock(struct file_lock *);
1178
+extern int locks_delete_block(struct file_lock *);
11511179 extern int vfs_test_lock(struct file *, struct file_lock *);
11521180 extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
11531181 extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
1182
+bool vfs_inode_has_locks(struct inode *inode);
11541183 extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
11551184 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
11561185 extern void lease_get_mtime(struct inode *, struct timespec64 *time);
11571186 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
11581187 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
11591188 extern int lease_modify(struct file_lock *, int, struct list_head *);
1189
+
1190
+struct notifier_block;
1191
+extern int lease_register_notifier(struct notifier_block *);
1192
+extern void lease_unregister_notifier(struct notifier_block *);
1193
+
11601194 struct files_struct;
11611195 extern void show_fd_locks(struct seq_file *f,
11621196 struct file *filp, struct files_struct *files);
....@@ -1237,7 +1271,7 @@
12371271 return -ENOLCK;
12381272 }
12391273
1240
-static inline int posix_unblock_lock(struct file_lock *waiter)
1274
+static inline int locks_delete_block(struct file_lock *waiter)
12411275 {
12421276 return -ENOENT;
12431277 }
....@@ -1256,6 +1290,11 @@
12561290 static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
12571291 {
12581292 return 0;
1293
+}
1294
+
1295
+static inline bool vfs_inode_has_locks(struct inode *inode)
1296
+{
1297
+ return false;
12591298 }
12601299
12611300 static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
....@@ -1343,32 +1382,34 @@
13431382 * sb->s_flags. Note that these mirror the equivalent MS_* flags where
13441383 * represented in both.
13451384 */
1346
-#define SB_RDONLY 1 /* Mount read-only */
1347
-#define SB_NOSUID 2 /* Ignore suid and sgid bits */
1348
-#define SB_NODEV 4 /* Disallow access to device special files */
1349
-#define SB_NOEXEC 8 /* Disallow program execution */
1350
-#define SB_SYNCHRONOUS 16 /* Writes are synced at once */
1351
-#define SB_MANDLOCK 64 /* Allow mandatory locks on an FS */
1352
-#define SB_DIRSYNC 128 /* Directory modifications are synchronous */
1353
-#define SB_NOATIME 1024 /* Do not update access times. */
1354
-#define SB_NODIRATIME 2048 /* Do not update directory access times */
1355
-#define SB_SILENT 32768
1356
-#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
1357
-#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
1358
-#define SB_I_VERSION (1<<23) /* Update inode I_version field */
1359
-#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
1385
+#define SB_RDONLY BIT(0) /* Mount read-only */
1386
+#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
1387
+#define SB_NODEV BIT(2) /* Disallow access to device special files */
1388
+#define SB_NOEXEC BIT(3) /* Disallow program execution */
1389
+#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
1390
+#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
1391
+#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
1392
+#define SB_NOATIME BIT(10) /* Do not update access times. */
1393
+#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
1394
+#define SB_SILENT BIT(15)
1395
+#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */
1396
+#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
1397
+#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
1398
+#define SB_I_VERSION BIT(23) /* Update inode I_version field */
1399
+#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
13601400
13611401 /* These sb flags are internal to the kernel */
1362
-#define SB_SUBMOUNT (1<<26)
1363
-#define SB_NOSEC (1<<28)
1364
-#define SB_BORN (1<<29)
1365
-#define SB_ACTIVE (1<<30)
1366
-#define SB_NOUSER (1<<31)
1402
+#define SB_SUBMOUNT BIT(26)
1403
+#define SB_FORCE BIT(27)
1404
+#define SB_NOSEC BIT(28)
1405
+#define SB_BORN BIT(29)
1406
+#define SB_ACTIVE BIT(30)
1407
+#define SB_NOUSER BIT(31)
13671408
13681409 /* These flags relate to encoding and casefolding */
13691410 #define SB_ENC_STRICT_MODE_FL (1 << 0)
13701411
1371
-#define sb_has_enc_strict_mode(sb) \
1412
+#define sb_has_strict_encoding(sb) \
13721413 (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
13731414
13741415 /*
....@@ -1385,12 +1426,14 @@
13851426 #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
13861427 #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
13871428 #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
1388
-#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */
1429
+#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */
13891430
13901431 /* sb->s_iflags to limit user namespace mounts */
13911432 #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
13921433 #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
13931434 #define SB_I_UNTRUSTED_MOUNTER 0x00000040
1435
+
1436
+#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
13941437
13951438 /* Possible states of 'frozen' field */
13961439 enum {
....@@ -1434,7 +1477,16 @@
14341477 const struct xattr_handler **s_xattr;
14351478 #ifdef CONFIG_FS_ENCRYPTION
14361479 const struct fscrypt_operations *s_cop;
1480
+#ifdef __GENKSYMS__
1481
+ /*
1482
+ * Android ABI CRC preservation due to commit 391cceee6d43 ("fscrypt:
1483
+ * stop using keyrings subsystem for fscrypt_master_key") changing this
1484
+ * type. Size is the same, this is a private field.
1485
+ */
14371486 struct key *s_master_keys; /* master crypto keys in use */
1487
+#else
1488
+ struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
1489
+#endif
14381490 #endif
14391491 #ifdef CONFIG_FS_VERITY
14401492 const struct fsverity_operations *s_vop;
....@@ -1454,16 +1506,28 @@
14541506
14551507 struct sb_writers s_writers;
14561508
1509
+ /*
1510
+ * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
1511
+ * s_fsnotify_marks together for cache efficiency. They are frequently
1512
+ * accessed and rarely modified.
1513
+ */
1514
+ void *s_fs_info; /* Filesystem private info */
1515
+
1516
+ /* Granularity of c/m/atime in ns (cannot be worse than a second) */
1517
+ u32 s_time_gran;
1518
+ /* Time limits for c/m/atime in seconds */
1519
+ time64_t s_time_min;
1520
+ time64_t s_time_max;
1521
+#ifdef CONFIG_FSNOTIFY
1522
+ __u32 s_fsnotify_mask;
1523
+ struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
1524
+#endif
1525
+
14571526 char s_id[32]; /* Informational name */
14581527 uuid_t s_uuid; /* UUID */
14591528
1460
- void *s_fs_info; /* Filesystem private info */
14611529 unsigned int s_max_links;
14621530 fmode_t s_mode;
1463
-
1464
- /* Granularity of c/m/atime in ns.
1465
- Cannot be worse than a second */
1466
- u32 s_time_gran;
14671531
14681532 /*
14691533 * The next field is for VFS *only*. No filesystems have any business
....@@ -1475,7 +1539,7 @@
14751539 * Filesystem subtype. If non-empty the filesystem type field
14761540 * in /proc/mounts will be "type.subtype"
14771541 */
1478
- char *s_subtype;
1542
+ const char *s_subtype;
14791543
14801544 const struct dentry_operations *s_d_op; /* default d_op for dentries */
14811545
....@@ -1495,6 +1559,9 @@
14951559 /* Being remounted read-only */
14961560 int s_readonly_remount;
14971561
1562
+ /* per-sb errseq_t for reporting writeback errors via syncfs */
1563
+ errseq_t s_wb_err;
1564
+
14981565 /* AIO completions deferred from interrupt context */
14991566 struct workqueue_struct *s_dio_done_wq;
15001567 struct hlist_head s_pins;
....@@ -1507,11 +1574,12 @@
15071574 struct user_namespace *s_user_ns;
15081575
15091576 /*
1510
- * Keep the lru lists last in the structure so they always sit on their
1511
- * own individual cachelines.
1577
+ * The list_lru structure is essentially just a pointer to a table
1578
+ * of per-node lru lists, each of which has its own spinlock.
1579
+ * There is no need to put them into separate cachelines.
15121580 */
1513
- struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1514
- struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1581
+ struct list_lru s_dentry_lru;
1582
+ struct list_lru s_inode_lru;
15151583 struct rcu_head rcu;
15161584 struct work_struct destroy_work;
15171585
....@@ -1560,15 +1628,30 @@
15601628 inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
15611629 }
15621630
1563
-extern struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran);
15641631 extern struct timespec64 current_time(struct inode *inode);
15651632
15661633 /*
15671634 * Snapshotting support.
15681635 */
15691636
1570
-void __sb_end_write(struct super_block *sb, int level);
1571
-int __sb_start_write(struct super_block *sb, int level, bool wait);
1637
+/*
1638
+ * These are internal functions, please use sb_start_{write,pagefault,intwrite}
1639
+ * instead.
1640
+ */
1641
+static inline void __sb_end_write(struct super_block *sb, int level)
1642
+{
1643
+ percpu_up_read(sb->s_writers.rw_sem + level-1);
1644
+}
1645
+
1646
+static inline void __sb_start_write(struct super_block *sb, int level)
1647
+{
1648
+ percpu_down_read(sb->s_writers.rw_sem + level - 1);
1649
+}
1650
+
1651
+static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
1652
+{
1653
+ return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
1654
+}
15721655
15731656 #define __sb_writers_acquired(sb, lev) \
15741657 percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
....@@ -1632,12 +1715,12 @@
16321715 */
16331716 static inline void sb_start_write(struct super_block *sb)
16341717 {
1635
- __sb_start_write(sb, SB_FREEZE_WRITE, true);
1718
+ __sb_start_write(sb, SB_FREEZE_WRITE);
16361719 }
16371720
1638
-static inline int sb_start_write_trylock(struct super_block *sb)
1721
+static inline bool sb_start_write_trylock(struct super_block *sb)
16391722 {
1640
- return __sb_start_write(sb, SB_FREEZE_WRITE, false);
1723
+ return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
16411724 }
16421725
16431726 /**
....@@ -1653,15 +1736,15 @@
16531736 *
16541737 * Since page fault freeze protection behaves as a lock, users have to preserve
16551738 * ordering of freeze protection and other filesystem locks. It is advised to
1656
- * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
1739
+ * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
16571740 * handling code implies lock dependency:
16581741 *
1659
- * mmap_sem
1742
+ * mmap_lock
16601743 * -> sb_start_pagefault
16611744 */
16621745 static inline void sb_start_pagefault(struct super_block *sb)
16631746 {
1664
- __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
1747
+ __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
16651748 }
16661749
16671750 /*
....@@ -1679,12 +1762,12 @@
16791762 */
16801763 static inline void sb_start_intwrite(struct super_block *sb)
16811764 {
1682
- __sb_start_write(sb, SB_FREEZE_FS, true);
1765
+ __sb_start_write(sb, SB_FREEZE_FS);
16831766 }
16841767
1685
-static inline int sb_start_intwrite_trylock(struct super_block *sb)
1768
+static inline bool sb_start_intwrite_trylock(struct super_block *sb)
16861769 {
1687
- return __sb_start_write(sb, SB_FREEZE_FS, false);
1770
+ return __sb_start_write_trylock(sb, SB_FREEZE_FS);
16881771 }
16891772
16901773
....@@ -1694,22 +1777,18 @@
16941777 * VFS helper functions..
16951778 */
16961779 extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
1697
-extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool);
16981780 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
1699
-extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t);
17001781 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
1701
-extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t);
17021782 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
1703
-extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *);
17041783 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
1705
-extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **);
17061784 extern int vfs_rmdir(struct inode *, struct dentry *);
1707
-extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *);
17081785 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
1709
-extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **);
17101786 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
1711
-extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
1712
-extern int vfs_whiteout(struct inode *, struct dentry *);
1787
+
1788
+static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
1789
+{
1790
+ return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
1791
+}
17131792
17141793 extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
17151794 int open_flag);
....@@ -1717,11 +1796,19 @@
17171796 int vfs_mkobj(struct dentry *, umode_t,
17181797 int (*f)(struct dentry *, umode_t, void *),
17191798 void *);
1720
-int vfs_mkobj2(struct vfsmount *, struct dentry *, umode_t,
1721
- int (*f)(struct dentry *, umode_t, void *),
1722
- void *);
1799
+
1800
+int vfs_fchown(struct file *file, uid_t user, gid_t group);
1801
+int vfs_fchmod(struct file *file, umode_t mode);
1802
+int vfs_utimes(const struct path *path, struct timespec64 *times);
17231803
17241804 extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1805
+
1806
+#ifdef CONFIG_COMPAT
1807
+extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
1808
+ unsigned long arg);
1809
+#else
1810
+#define compat_ptr_ioctl NULL
1811
+#endif
17251812
17261813 /*
17271814 * VFS file helper functions.
....@@ -1729,35 +1816,7 @@
17291816 extern void inode_init_owner(struct inode *inode, const struct inode *dir,
17301817 umode_t mode);
17311818 extern bool may_open_dev(const struct path *path);
1732
-/*
1733
- * VFS FS_IOC_FIEMAP helper definitions.
1734
- */
1735
-struct fiemap_extent_info {
1736
- unsigned int fi_flags; /* Flags as passed from user */
1737
- unsigned int fi_extents_mapped; /* Number of mapped extents */
1738
- unsigned int fi_extents_max; /* Size of fiemap_extent array */
1739
- struct fiemap_extent __user *fi_extents_start; /* Start of
1740
- fiemap_extent array */
1741
-};
1742
-int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1743
- u64 phys, u64 len, u32 flags);
1744
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1745
-
1746
-/*
1747
- * File types
1748
- *
1749
- * NOTE! These match bits 12..15 of stat.st_mode
1750
- * (ie "(i_mode >> 12) & 15").
1751
- */
1752
-#define DT_UNKNOWN 0
1753
-#define DT_FIFO 1
1754
-#define DT_CHR 2
1755
-#define DT_DIR 4
1756
-#define DT_BLK 6
1757
-#define DT_REG 8
1758
-#define DT_LNK 10
1759
-#define DT_SOCK 12
1760
-#define DT_WHT 14
1819
+umode_t mode_strip_sgid(const struct inode *dir, umode_t mode);
17611820
17621821 /*
17631822 * This is the "filldir" function type, used by readdir() to let
....@@ -1773,14 +1832,6 @@
17731832 filldir_t actor;
17741833 loff_t pos;
17751834 };
1776
-
1777
-struct block_device_operations;
1778
-
1779
-/* These macros are for out of kernel modules to test that
1780
- * the kernel supports the unlocked_ioctl and compat_ioctl
1781
- * fields in struct file_operations. */
1782
-#define HAVE_COMPAT_IOCTL 1
1783
-#define HAVE_UNLOCKED_IOCTL 1
17841835
17851836 /*
17861837 * These flags let !MMU mmap() govern direct device mapping vs immediate
....@@ -1801,6 +1852,33 @@
18011852 #define NOMMU_VMFLAGS \
18021853 (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
18031854
1855
+/*
1856
+ * These flags control the behavior of the remap_file_range function pointer.
1857
+ * If it is called with len == 0 that means "remap to end of source file".
1858
+ * See Documentation/filesystems/vfs.rst for more details about this call.
1859
+ *
1860
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
1861
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
1862
+ */
1863
+#define REMAP_FILE_DEDUP (1 << 0)
1864
+#define REMAP_FILE_CAN_SHORTEN (1 << 1)
1865
+
1866
+/*
1867
+ * These flags signal that the caller is ok with altering various aspects of
1868
+ * the behavior of the remap operation. The changes must be made by the
1869
+ * implementation; the vfs remap helper functions can take advantage of them.
1870
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
1871
+ * btrfs clone/dedupe ioctls.
1872
+ */
1873
+#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
1874
+
1875
+/*
1876
+ * These flags control the behavior of vfs_copy_file_range().
1877
+ * They are not available to the user via syscall.
1878
+ *
1879
+ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
1880
+ */
1881
+#define COPY_FILE_SPLICE (1 << 0)
18041882
18051883 struct iov_iter;
18061884
....@@ -1811,6 +1889,7 @@
18111889 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
18121890 ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
18131891 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
1892
+ int (*iopoll)(struct kiocb *kiocb, bool spin);
18141893 int (*iterate) (struct file *, struct dir_context *);
18151894 int (*iterate_shared) (struct file *, struct dir_context *);
18161895 __poll_t (*poll) (struct file *, struct poll_table_struct *);
....@@ -1839,10 +1918,9 @@
18391918 #endif
18401919 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
18411920 loff_t, size_t, unsigned int);
1842
- int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
1843
- u64);
1844
- int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
1845
- u64);
1921
+ loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
1922
+ struct file *file_out, loff_t pos_out,
1923
+ loff_t len, unsigned int remap_flags);
18461924 int (*fadvise)(struct file *, loff_t, loff_t, int);
18471925
18481926 ANDROID_KABI_RESERVE(1);
....@@ -1855,7 +1933,6 @@
18551933 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
18561934 const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
18571935 int (*permission) (struct inode *, int);
1858
- int (*permission2) (struct vfsmount *, struct inode *, int);
18591936 struct posix_acl * (*get_acl)(struct inode *, int);
18601937
18611938 int (*readlink) (struct dentry *, char __user *,int);
....@@ -1870,8 +1947,7 @@
18701947 int (*rename) (struct inode *, struct dentry *,
18711948 struct inode *, struct dentry *, unsigned int);
18721949 int (*setattr) (struct dentry *, struct iattr *);
1873
- int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *);
1874
- int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
1950
+ int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
18751951 ssize_t (*listxattr) (struct dentry *, char *, size_t);
18761952 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
18771953 u64 len);
....@@ -1905,38 +1981,34 @@
19051981 return file->f_op->mmap(file, vma);
19061982 }
19071983
1908
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
1909
- unsigned long nr_segs, unsigned long fast_segs,
1910
- struct iovec *fast_pointer,
1911
- struct iovec **ret_pointer);
1912
-
1913
-extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
19141984 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
19151985 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
1916
-extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1917
- unsigned long, loff_t *, rwf_t);
19181986 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
19191987 loff_t, size_t, unsigned int);
1920
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1921
- struct inode *inode_out, loff_t pos_out,
1922
- u64 *len, bool is_dedupe);
1923
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
1924
- struct file *file_out, loff_t pos_out, u64 len);
1925
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1926
- struct file *file_out, loff_t pos_out, u64 len);
1927
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1928
- struct inode *dest, loff_t destoff,
1929
- loff_t len, bool *is_same);
1988
+extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
1989
+ struct file *file_out, loff_t pos_out,
1990
+ size_t len, unsigned int flags);
1991
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1992
+ struct file *file_out, loff_t pos_out,
1993
+ loff_t *count,
1994
+ unsigned int remap_flags);
1995
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
1996
+ struct file *file_out, loff_t pos_out,
1997
+ loff_t len, unsigned int remap_flags);
1998
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1999
+ struct file *file_out, loff_t pos_out,
2000
+ loff_t len, unsigned int remap_flags);
19302001 extern int vfs_dedupe_file_range(struct file *file,
19312002 struct file_dedupe_range *same);
1932
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
1933
- struct file *dst_file, loff_t dst_pos,
1934
- u64 len);
2003
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2004
+ struct file *dst_file, loff_t dst_pos,
2005
+ loff_t len, unsigned int remap_flags);
19352006
19362007
19372008 struct super_operations {
19382009 struct inode *(*alloc_inode)(struct super_block *sb);
19392010 void (*destroy_inode)(struct inode *);
2011
+ void (*free_inode)(struct inode *);
19402012
19412013 void (*dirty_inode) (struct inode *, int flags);
19422014 int (*write_inode) (struct inode *, struct writeback_control *wbc);
....@@ -1950,14 +2022,9 @@
19502022 int (*unfreeze_fs) (struct super_block *);
19512023 int (*statfs) (struct dentry *, struct kstatfs *);
19522024 int (*remount_fs) (struct super_block *, int *, char *);
1953
- int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *);
1954
- void *(*clone_mnt_data) (void *);
1955
- void (*copy_mnt_data) (void *, void *);
19562025 void (*umount_begin) (struct super_block *);
1957
- void (*umount_end)(struct super_block *sb, int flags);
19582026
19592027 int (*show_options)(struct seq_file *, struct dentry *);
1960
- int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *);
19612028 int (*show_devname)(struct seq_file *, struct dentry *);
19622029 int (*show_path)(struct seq_file *, struct dentry *);
19632030 int (*show_stats)(struct seq_file *, struct dentry *);
....@@ -1981,27 +2048,27 @@
19812048 /*
19822049 * Inode flags - they have no relation to superblock flags now
19832050 */
1984
-#define S_SYNC 1 /* Writes are synced at once */
1985
-#define S_NOATIME 2 /* Do not update access times */
1986
-#define S_APPEND 4 /* Append-only file */
1987
-#define S_IMMUTABLE 8 /* Immutable file */
1988
-#define S_DEAD 16 /* removed, but still open directory */
1989
-#define S_NOQUOTA 32 /* Inode is not counted to quota */
1990
-#define S_DIRSYNC 64 /* Directory modifications are synchronous */
1991
-#define S_NOCMTIME 128 /* Do not update file c/mtime */
1992
-#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
1993
-#define S_PRIVATE 512 /* Inode is fs-internal */
1994
-#define S_IMA 1024 /* Inode has an associated IMA struct */
1995
-#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
1996
-#define S_NOSEC 4096 /* no suid or xattr security attributes */
2051
+#define S_SYNC (1 << 0) /* Writes are synced at once */
2052
+#define S_NOATIME (1 << 1) /* Do not update access times */
2053
+#define S_APPEND (1 << 2) /* Append-only file */
2054
+#define S_IMMUTABLE (1 << 3) /* Immutable file */
2055
+#define S_DEAD (1 << 4) /* removed, but still open directory */
2056
+#define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */
2057
+#define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */
2058
+#define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */
2059
+#define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */
2060
+#define S_PRIVATE (1 << 9) /* Inode is fs-internal */
2061
+#define S_IMA (1 << 10) /* Inode has an associated IMA struct */
2062
+#define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */
2063
+#define S_NOSEC (1 << 12) /* no suid or xattr security attributes */
19972064 #ifdef CONFIG_FS_DAX
1998
-#define S_DAX 8192 /* Direct Access, avoiding the page cache */
2065
+#define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */
19992066 #else
2000
-#define S_DAX 0 /* Make all the DAX code disappear */
2067
+#define S_DAX 0 /* Make all the DAX code disappear */
20012068 #endif
2002
-#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
2003
-#define S_CASEFOLD 32768 /* Casefolded file */
2004
-#define S_VERITY 65536 /* Verity file (using fs/verity/) */
2069
+#define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */
2070
+#define S_CASEFOLD (1 << 15) /* Casefolded file */
2071
+#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
20052072
20062073 /*
20072074 * Note that nosuid etc flags are inode-specific: setting some file-system
....@@ -2078,7 +2145,19 @@
20782145 .ki_filp = filp,
20792146 .ki_flags = iocb_flags(filp),
20802147 .ki_hint = ki_hint_validate(file_write_hint(filp)),
2081
- .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0),
2148
+ .ki_ioprio = get_current_ioprio(),
2149
+ };
2150
+}
2151
+
2152
+static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
2153
+ struct file *filp)
2154
+{
2155
+ *kiocb = (struct kiocb) {
2156
+ .ki_filp = filp,
2157
+ .ki_flags = kiocb_src->ki_flags,
2158
+ .ki_hint = kiocb_src->ki_hint,
2159
+ .ki_ioprio = kiocb_src->ki_ioprio,
2160
+ .ki_pos = kiocb_src->ki_pos,
20822161 };
20832162 }
20842163
....@@ -2136,12 +2215,14 @@
21362215 * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
21372216 * synchronize competing switching instances and to tell
21382217 * wb stat updates to grab the i_pages lock. See
2139
- * inode_switch_wb_work_fn() for details.
2218
+ * inode_switch_wbs_work_fn() for details.
21402219 *
21412220 * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
21422221 * and work dirs among overlayfs mounts.
21432222 *
21442223 * I_CREATING New object's inode in the middle of setting up.
2224
+ *
2225
+ * I_DONTCACHE Evict inode as soon as it is not used anymore.
21452226 *
21462227 * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
21472228 * Used to detect that mark_inode_dirty() should not move
....@@ -2167,6 +2248,7 @@
21672248 #define I_WB_SWITCH (1 << 13)
21682249 #define I_OVL_INUSE (1 << 14)
21692250 #define I_CREATING (1 << 15)
2251
+#define I_DONTCACHE (1 << 16)
21702252 #define I_SYNC_QUEUED (1 << 17)
21712253
21722254 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
....@@ -2210,11 +2292,15 @@
22102292
22112293 extern bool atime_needs_update(const struct path *, struct inode *);
22122294 extern void touch_atime(const struct path *);
2295
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
2296
+
22132297 static inline void file_accessed(struct file *file)
22142298 {
22152299 if (!(file->f_flags & O_NOATIME))
22162300 touch_atime(&file->f_path);
22172301 }
2302
+
2303
+extern int file_modified(struct file *file);
22182304
22192305 int sync_inode(struct inode *inode, struct writeback_control *wbc);
22202306 int sync_inode_metadata(struct inode *inode, int wait);
....@@ -2226,12 +2312,13 @@
22262312 #define FS_BINARY_MOUNTDATA 2
22272313 #define FS_HAS_SUBTYPE 4
22282314 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
2315
+#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
2316
+#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */
22292317 #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
2318
+ int (*init_fs_context)(struct fs_context *);
2319
+ const struct fs_parameter_spec *parameters;
22302320 struct dentry *(*mount) (struct file_system_type *, int,
22312321 const char *, void *);
2232
- struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int,
2233
- const char *, void *);
2234
- void *(*alloc_mnt_data) (void);
22352322 void (*kill_sb) (struct super_block *);
22362323 struct module *owner;
22372324 struct file_system_type * next;
....@@ -2254,21 +2341,9 @@
22542341
22552342 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
22562343
2257
-extern struct dentry *mount_ns(struct file_system_type *fs_type,
2258
- int flags, void *data, void *ns, struct user_namespace *user_ns,
2259
- int (*fill_super)(struct super_block *, void *, int));
2260
-#ifdef CONFIG_BLOCK
22612344 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
22622345 int flags, const char *dev_name, void *data,
22632346 int (*fill_super)(struct super_block *, void *, int));
2264
-#else
2265
-static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
2266
- int flags, const char *dev_name, void *data,
2267
- int (*fill_super)(struct super_block *, void *, int))
2268
-{
2269
- return ERR_PTR(-ENODEV);
2270
-}
2271
-#endif
22722347 extern struct dentry *mount_single(struct file_system_type *fs_type,
22732348 int flags, void *data,
22742349 int (*fill_super)(struct super_block *, void *, int));
....@@ -2277,43 +2352,22 @@
22772352 int (*fill_super)(struct super_block *, void *, int));
22782353 extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
22792354 void generic_shutdown_super(struct super_block *sb);
2280
-#ifdef CONFIG_BLOCK
22812355 void kill_block_super(struct super_block *sb);
2282
-#else
2283
-static inline void kill_block_super(struct super_block *sb)
2284
-{
2285
- BUG();
2286
-}
2287
-#endif
22882356 void kill_anon_super(struct super_block *sb);
22892357 void kill_litter_super(struct super_block *sb);
22902358 void deactivate_super(struct super_block *sb);
22912359 void deactivate_locked_super(struct super_block *sb);
22922360 int set_anon_super(struct super_block *s, void *data);
2361
+int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
22932362 int get_anon_bdev(dev_t *);
22942363 void free_anon_bdev(dev_t);
2295
-struct super_block *sget_userns(struct file_system_type *type,
2296
- int (*test)(struct super_block *,void *),
2297
- int (*set)(struct super_block *,void *),
2298
- int flags, struct user_namespace *user_ns,
2299
- void *data);
2364
+struct super_block *sget_fc(struct fs_context *fc,
2365
+ int (*test)(struct super_block *, struct fs_context *),
2366
+ int (*set)(struct super_block *, struct fs_context *));
23002367 struct super_block *sget(struct file_system_type *type,
23012368 int (*test)(struct super_block *,void *),
23022369 int (*set)(struct super_block *,void *),
23032370 int flags, void *data);
2304
-extern struct dentry *mount_pseudo_xattr(struct file_system_type *, char *,
2305
- const struct super_operations *ops,
2306
- const struct xattr_handler **xattr,
2307
- const struct dentry_operations *dops,
2308
- unsigned long);
2309
-
2310
-static inline struct dentry *
2311
-mount_pseudo(struct file_system_type *fs_type, char *name,
2312
- const struct super_operations *ops,
2313
- const struct dentry_operations *dops, unsigned long magic)
2314
-{
2315
- return mount_pseudo_xattr(fs_type, name, ops, NULL, dops, magic);
2316
-}
23172371
23182372 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
23192373 #define fops_get(fops) \
....@@ -2334,8 +2388,7 @@
23342388
23352389 extern int register_filesystem(struct file_system_type *);
23362390 extern int unregister_filesystem(struct file_system_type *);
2337
-extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
2338
-#define kern_mount(type) kern_mount_data(type, NULL)
2391
+extern struct vfsmount *kern_mount(struct file_system_type *);
23392392 extern void kern_unmount(struct vfsmount *mnt);
23402393 extern int may_umount_tree(struct vfsmount *);
23412394 extern int may_umount(struct vfsmount *);
....@@ -2549,18 +2602,18 @@
25492602 struct audit_names *aname;
25502603 const char iname[];
25512604 };
2605
+static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
25522606
25532607 extern long vfs_truncate(const struct path *, loff_t);
25542608 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
25552609 struct file *filp);
2556
-extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start,
2557
- unsigned int time_attrs, struct file *filp);
25582610 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
25592611 loff_t len);
25602612 extern long do_sys_open(int dfd, const char __user *filename, int flags,
25612613 umode_t mode);
25622614 extern struct file *file_open_name(struct filename *, int, umode_t);
25632615 extern struct file *filp_open(const char *, int, umode_t);
2616
+extern struct file *filp_open_block(const char *, int, umode_t);
25642617 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
25652618 const char *, int, umode_t);
25662619 extern struct file * dentry_open(const struct path *, int, const struct cred *);
....@@ -2581,10 +2634,6 @@
25812634 int (*open)(struct inode *, struct file *));
25822635 extern int finish_no_open(struct file *file, struct dentry *dentry);
25832636
2584
-/* fs/ioctl.c */
2585
-
2586
-extern int ioctl_preallocate(struct file *filp, void __user *argp);
2587
-
25882637 /* fs/dcache.c */
25892638 extern void __init vfs_caches_init_early(void);
25902639 extern void __init vfs_caches_init(void);
....@@ -2594,93 +2643,16 @@
25942643 #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
25952644 #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
25962645
2597
-#ifdef CONFIG_BLOCK
2598
-extern int register_blkdev(unsigned int, const char *);
2599
-extern void unregister_blkdev(unsigned int, const char *);
2600
-extern void bdev_unhash_inode(dev_t dev);
2601
-extern struct block_device *bdget(dev_t);
2602
-extern struct block_device *bdgrab(struct block_device *bdev);
2603
-extern void bd_set_size(struct block_device *, loff_t size);
2604
-extern void bd_forget(struct inode *inode);
2605
-extern void bdput(struct block_device *);
2606
-extern void invalidate_bdev(struct block_device *);
2607
-extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
2608
-extern int sync_blockdev(struct block_device *bdev);
2609
-extern void kill_bdev(struct block_device *);
2610
-extern struct super_block *freeze_bdev(struct block_device *);
2611
-extern void emergency_thaw_all(void);
2612
-extern void emergency_thaw_bdev(struct super_block *sb);
2613
-extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2614
-extern int fsync_bdev(struct block_device *);
2615
-
26162646 extern struct super_block *blockdev_superblock;
2617
-
26182647 static inline bool sb_is_blkdev_sb(struct super_block *sb)
26192648 {
2620
- return sb == blockdev_superblock;
2621
-}
2622
-#else
2623
-static inline void bd_forget(struct inode *inode) {}
2624
-static inline int sync_blockdev(struct block_device *bdev) { return 0; }
2625
-static inline void kill_bdev(struct block_device *bdev) {}
2626
-static inline void invalidate_bdev(struct block_device *bdev) {}
2627
-
2628
-static inline struct super_block *freeze_bdev(struct block_device *sb)
2629
-{
2630
- return NULL;
2649
+ return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
26312650 }
26322651
2633
-static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
2634
-{
2635
- return 0;
2636
-}
2637
-
2638
-static inline int emergency_thaw_bdev(struct super_block *sb)
2639
-{
2640
- return 0;
2641
-}
2642
-
2643
-static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
2644
-{
2645
-}
2646
-
2647
-static inline bool sb_is_blkdev_sb(struct super_block *sb)
2648
-{
2649
- return false;
2650
-}
2651
-#endif
2652
+void emergency_thaw_all(void);
26522653 extern int sync_filesystem(struct super_block *);
26532654 extern const struct file_operations def_blk_fops;
26542655 extern const struct file_operations def_chr_fops;
2655
-#ifdef CONFIG_BLOCK
2656
-extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
2657
-extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
2658
-extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
2659
-extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
2660
-extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
2661
- void *holder);
2662
-extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
2663
- void *holder);
2664
-extern void blkdev_put(struct block_device *bdev, fmode_t mode);
2665
-extern int __blkdev_reread_part(struct block_device *bdev);
2666
-extern int blkdev_reread_part(struct block_device *bdev);
2667
-
2668
-#ifdef CONFIG_SYSFS
2669
-extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
2670
-extern void bd_unlink_disk_holder(struct block_device *bdev,
2671
- struct gendisk *disk);
2672
-#else
2673
-static inline int bd_link_disk_holder(struct block_device *bdev,
2674
- struct gendisk *disk)
2675
-{
2676
- return 0;
2677
-}
2678
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
2679
- struct gendisk *disk)
2680
-{
2681
-}
2682
-#endif
2683
-#endif
26842656
26852657 /* fs/char_dev.c */
26862658 #define CHRDEV_MAJOR_MAX 512
....@@ -2711,37 +2683,18 @@
27112683 __unregister_chrdev(major, 0, 256, name);
27122684 }
27132685
2714
-/* fs/block_dev.c */
2715
-#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
2716
-#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
2717
-
2718
-#ifdef CONFIG_BLOCK
2719
-#define BLKDEV_MAJOR_MAX 512
2720
-extern const char *__bdevname(dev_t, char *buffer);
2721
-extern const char *bdevname(struct block_device *bdev, char *buffer);
2722
-extern struct block_device *lookup_bdev(const char *);
2723
-extern void blkdev_show(struct seq_file *,off_t);
2724
-
2725
-#else
2726
-#define BLKDEV_MAJOR_MAX 0
2727
-#endif
2728
-
27292686 extern void init_special_inode(struct inode *, umode_t, dev_t);
27302687
27312688 /* Invalid inode operations -- fs/bad_inode.c */
27322689 extern void make_bad_inode(struct inode *);
27332690 extern bool is_bad_inode(struct inode *);
27342691
2735
-#ifdef CONFIG_BLOCK
2736
-extern void check_disk_size_change(struct gendisk *disk,
2737
- struct block_device *bdev, bool verbose);
2738
-extern int revalidate_disk(struct gendisk *);
2739
-extern int check_disk_change(struct block_device *);
2740
-extern int __invalidate_device(struct block_device *, bool);
2741
-extern int invalidate_partition(struct gendisk *, int);
2742
-#endif
27432692 unsigned long invalidate_mapping_pages(struct address_space *mapping,
27442693 pgoff_t start, pgoff_t end);
2694
+
2695
+void invalidate_mapping_pagevec(struct address_space *mapping,
2696
+ pgoff_t start, pgoff_t end,
2697
+ unsigned long *nr_pagevec);
27452698
27462699 static inline void invalidate_remote_inode(struct inode *inode)
27472700 {
....@@ -2768,7 +2721,6 @@
27682721
27692722 extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
27702723 loff_t lend);
2771
-extern int filemap_write_and_wait(struct address_space *mapping);
27722724 extern int filemap_write_and_wait_range(struct address_space *mapping,
27732725 loff_t lstart, loff_t lend);
27742726 extern int __filemap_fdatawrite_range(struct address_space *mapping,
....@@ -2777,6 +2729,11 @@
27772729 loff_t start, loff_t end);
27782730 extern int filemap_check_errors(struct address_space *mapping);
27792731 extern void __filemap_set_wb_err(struct address_space *mapping, int err);
2732
+
2733
+static inline int filemap_write_and_wait(struct address_space *mapping)
2734
+{
2735
+ return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
2736
+}
27802737
27812738 extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
27822739 loff_t lend);
....@@ -2811,7 +2768,7 @@
28112768 }
28122769
28132770 /**
2814
- * filemap_check_wb_error - has an error occurred since the mark was sampled?
2771
+ * filemap_check_wb_err - has an error occurred since the mark was sampled?
28152772 * @mapping: mapping to check for writeback errors
28162773 * @since: previously-sampled errseq_t
28172774 *
....@@ -2838,9 +2795,24 @@
28382795 return errseq_sample(&mapping->wb_err);
28392796 }
28402797
2798
+/**
2799
+ * file_sample_sb_err - sample the current errseq_t to test for later errors
2800
+ * @file: file pointer to be sampled
2801
+ *
2802
+ * Grab the most current superblock-level errseq_t value for the given
2803
+ * struct file.
2804
+ */
2805
+static inline errseq_t file_sample_sb_err(struct file *file)
2806
+{
2807
+ return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
2808
+}
2809
+
28412810 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
28422811 int datasync);
28432812 extern int vfs_fsync(struct file *file, int datasync);
2813
+
2814
+extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2815
+ unsigned int flags);
28442816
28452817 /*
28462818 * Sync the bytes written if this was a synchronous write. Expect ki_pos
....@@ -2862,13 +2834,18 @@
28622834
28632835 extern void emergency_sync(void);
28642836 extern void emergency_remount(void);
2837
+
28652838 #ifdef CONFIG_BLOCK
2866
-extern sector_t bmap(struct inode *, sector_t);
2839
+extern int bmap(struct inode *inode, sector_t *block);
2840
+#else
2841
+static inline int bmap(struct inode *inode, sector_t *block)
2842
+{
2843
+ return -EINVAL;
2844
+}
28672845 #endif
2846
+
28682847 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
2869
-extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **);
28702848 extern int inode_permission(struct inode *, int);
2871
-extern int inode_permission2(struct vfsmount *, struct inode *, int);
28722849 extern int generic_permission(struct inode *, int);
28732850 extern int __check_sticky(struct inode *dir, struct inode *inode);
28742851
....@@ -2877,18 +2854,23 @@
28772854 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
28782855 }
28792856
2857
+static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
2858
+{
2859
+ return (inode->i_mode ^ mode) & S_IFMT;
2860
+}
2861
+
28802862 static inline void file_start_write(struct file *file)
28812863 {
28822864 if (!S_ISREG(file_inode(file)->i_mode))
28832865 return;
2884
- __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
2866
+ sb_start_write(file_inode(file)->i_sb);
28852867 }
28862868
28872869 static inline bool file_start_write_trylock(struct file *file)
28882870 {
28892871 if (!S_ISREG(file_inode(file)->i_mode))
28902872 return true;
2891
- return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false);
2873
+ return sb_start_write_trylock(file_inode(file)->i_sb);
28922874 }
28932875
28942876 static inline void file_end_write(struct file *file)
....@@ -2937,7 +2919,7 @@
29372919 return atomic_read(&inode->i_writecount) > 0;
29382920 }
29392921
2940
-#ifdef CONFIG_IMA
2922
+#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
29412923 static inline void i_readcount_dec(struct inode *inode)
29422924 {
29432925 BUG_ON(!atomic_read(&inode->i_readcount));
....@@ -2959,43 +2941,8 @@
29592941 #endif
29602942 extern int do_pipe_flags(int *, int);
29612943
2962
-#define __kernel_read_file_id(id) \
2963
- id(UNKNOWN, unknown) \
2964
- id(FIRMWARE, firmware) \
2965
- id(FIRMWARE_PREALLOC_BUFFER, firmware) \
2966
- id(MODULE, kernel-module) \
2967
- id(KEXEC_IMAGE, kexec-image) \
2968
- id(KEXEC_INITRAMFS, kexec-initramfs) \
2969
- id(POLICY, security-policy) \
2970
- id(X509_CERTIFICATE, x509-certificate) \
2971
- id(MAX_ID, )
2972
-
2973
-#define __fid_enumify(ENUM, dummy) READING_ ## ENUM,
2974
-#define __fid_stringify(dummy, str) #str,
2975
-
2976
-enum kernel_read_file_id {
2977
- __kernel_read_file_id(__fid_enumify)
2978
-};
2979
-
2980
-static const char * const kernel_read_file_str[] = {
2981
- __kernel_read_file_id(__fid_stringify)
2982
-};
2983
-
2984
-static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
2985
-{
2986
- if ((unsigned)id >= READING_MAX_ID)
2987
- return kernel_read_file_str[READING_UNKNOWN];
2988
-
2989
- return kernel_read_file_str[id];
2990
-}
2991
-
2992
-extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
2993
- enum kernel_read_file_id);
2994
-extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t,
2995
- enum kernel_read_file_id);
2996
-extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
2997
- enum kernel_read_file_id);
29982944 extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
2945
+ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
29992946 extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
30002947 extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
30012948 extern struct file * open_exec(const char *);
....@@ -3024,6 +2971,7 @@
30242971 {
30252972 return !inode->i_nlink || inode_unhashed(inode);
30262973 }
2974
+extern void d_mark_dontcache(struct inode *inode);
30272975
30282976 extern struct inode *ilookup5_nowait(struct super_block *sb,
30292977 unsigned long hashval, int (*test)(struct inode *, void *),
....@@ -3043,6 +2991,9 @@
30432991 int (*match)(struct inode *,
30442992 unsigned long, void *),
30452993 void *data);
2994
+extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
2995
+ int (*)(struct inode *, void *), void *);
2996
+extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
30462997 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
30472998 extern int insert_inode_locked(struct inode *);
30482999 #ifdef CONFIG_DEBUG_LOCK_ALLOC
....@@ -3055,6 +3006,21 @@
30553006 extern unsigned int get_next_ino(void);
30563007 extern void evict_inodes(struct super_block *sb);
30573008
3009
+/*
3010
+ * Userspace may rely on the the inode number being non-zero. For example, glibc
3011
+ * simply ignores files with zero i_ino in unlink() and other places.
3012
+ *
3013
+ * As an additional complication, if userspace was compiled with
3014
+ * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
3015
+ * lower 32 bits, so we need to check that those aren't zero explicitly. With
3016
+ * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
3017
+ * better safe than sorry.
3018
+ */
3019
+static inline bool is_zero_ino(ino_t ino)
3020
+{
3021
+ return (u32)ino == 0;
3022
+}
3023
+
30583024 extern void __iget(struct inode * inode);
30593025 extern void iget_failed(struct inode *);
30603026 extern void clear_inode(struct inode *);
....@@ -3062,7 +3028,7 @@
30623028 extern struct inode *new_inode_pseudo(struct super_block *sb);
30633029 extern struct inode *new_inode(struct super_block *sb);
30643030 extern void free_inode_nonrcu(struct inode *inode);
3065
-extern int should_remove_suid(struct dentry *);
3031
+extern int setattr_should_drop_suidgid(struct inode *);
30663032 extern int file_remove_privs(struct file *);
30673033
30683034 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
....@@ -3080,16 +3046,17 @@
30803046
30813047 extern void inode_sb_list_add(struct inode *inode);
30823048
3083
-#ifdef CONFIG_BLOCK
3084
-extern int bdev_read_only(struct block_device *);
3085
-#endif
3086
-extern int set_blocksize(struct block_device *, int);
30873049 extern int sb_set_blocksize(struct super_block *, int);
30883050 extern int sb_min_blocksize(struct super_block *, int);
30893051
30903052 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
30913053 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
30923054 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
3055
+extern int generic_write_check_limits(struct file *file, loff_t pos,
3056
+ loff_t *count);
3057
+extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
3058
+extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
3059
+ struct iov_iter *to, ssize_t already_read);
30933060 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
30943061 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
30953062 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
....@@ -3100,6 +3067,10 @@
31003067 rwf_t flags);
31013068 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
31023069 rwf_t flags);
3070
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
3071
+ struct iov_iter *iter);
3072
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
3073
+ struct iov_iter *iter);
31033074
31043075 /* fs/block_dev.c */
31053076 extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
....@@ -3147,9 +3118,6 @@
31473118 DIO_SKIP_HOLES = 0x02,
31483119 };
31493120
3150
-void dio_end_io(struct bio *bio);
3151
-void dio_warn_stale_pagecache(struct file *filp);
3152
-
31533121 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
31543122 struct block_device *bdev, struct iov_iter *iter,
31553123 get_block_t get_block,
....@@ -3193,6 +3161,11 @@
31933161 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
31943162 }
31953163
3164
+/*
3165
+ * Warn about a page cache invalidation failure diring a direct I/O write.
3166
+ */
3167
+void dio_warn_stale_pagecache(struct file *filp);
3168
+
31963169 extern void inode_set_flags(struct inode *inode, unsigned int flags,
31973170 unsigned int mask);
31983171
....@@ -3229,41 +3202,21 @@
32293202
32303203 extern int iterate_dir(struct file *, struct dir_context *);
32313204
3232
-extern int vfs_statx(int, const char __user *, int, struct kstat *, u32);
3233
-extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
3205
+int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
3206
+ int flags);
3207
+int vfs_fstat(int fd, struct kstat *stat);
32343208
32353209 static inline int vfs_stat(const char __user *filename, struct kstat *stat)
32363210 {
3237
- return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT,
3238
- stat, STATX_BASIC_STATS);
3211
+ return vfs_fstatat(AT_FDCWD, filename, stat, 0);
32393212 }
32403213 static inline int vfs_lstat(const char __user *name, struct kstat *stat)
32413214 {
3242
- return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT,
3243
- stat, STATX_BASIC_STATS);
3215
+ return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
32443216 }
3245
-static inline int vfs_fstatat(int dfd, const char __user *filename,
3246
- struct kstat *stat, int flags)
3247
-{
3248
- return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
3249
- stat, STATX_BASIC_STATS);
3250
-}
3251
-static inline int vfs_fstat(int fd, struct kstat *stat)
3252
-{
3253
- return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0);
3254
-}
3255
-
32563217
32573218 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
32583219 extern int vfs_readlink(struct dentry *, char __user *, int);
3259
-
3260
-extern int __generic_block_fiemap(struct inode *inode,
3261
- struct fiemap_extent_info *fieinfo,
3262
- loff_t start, loff_t len,
3263
- get_block_t *get_block);
3264
-extern int generic_block_fiemap(struct inode *inode,
3265
- struct fiemap_extent_info *fieinfo, u64 start,
3266
- u64 len, get_block_t *get_block);
32673220
32683221 extern struct file_system_type *get_filesystem(struct file_system_type *fs);
32693222 extern void put_filesystem(struct file_system_type *fs);
....@@ -3291,6 +3244,8 @@
32913244 extern int simple_rmdir(struct inode *, struct dentry *);
32923245 extern int simple_rename(struct inode *, struct dentry *,
32933246 struct inode *, struct dentry *, unsigned int);
3247
+extern void simple_recursive_removal(struct dentry *,
3248
+ void (*callback)(struct dentry *));
32943249 extern int noop_fsync(struct file *, loff_t, loff_t, int);
32953250 extern int noop_set_page_dirty(struct page *page);
32963251 extern void noop_invalidatepage(struct page *page, unsigned int offset,
....@@ -3332,26 +3287,18 @@
33323287
33333288 extern int generic_check_addressable(unsigned, u64);
33343289
3335
-#ifdef CONFIG_UNICODE
3336
-extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
3337
-extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
3338
- const char *str, const struct qstr *name);
3339
-extern bool needs_casefold(const struct inode *dir);
3340
-#else
3341
-static inline bool needs_casefold(const struct inode *dir)
3342
-{
3343
- return 0;
3344
-}
3345
-#endif
3346
-extern void generic_set_encrypted_ci_d_ops(struct inode *dir,
3347
- struct dentry *dentry);
3290
+extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
33483291
33493292 #ifdef CONFIG_MIGRATION
33503293 extern int buffer_migrate_page(struct address_space *,
33513294 struct page *, struct page *,
33523295 enum migrate_mode);
3296
+extern int buffer_migrate_page_norefs(struct address_space *,
3297
+ struct page *, struct page *,
3298
+ enum migrate_mode);
33533299 #else
33543300 #define buffer_migrate_page NULL
3301
+#define buffer_migrate_page_norefs NULL
33553302 #endif
33563303
33573304 extern int setattr_prepare(struct dentry *, struct iattr *);
....@@ -3360,12 +3307,7 @@
33603307
33613308 extern int file_update_time(struct file *file);
33623309
3363
-static inline bool io_is_direct(struct file *filp)
3364
-{
3365
- return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
3366
-}
3367
-
3368
-static inline bool vma_is_dax(struct vm_area_struct *vma)
3310
+static inline bool vma_is_dax(const struct vm_area_struct *vma)
33693311 {
33703312 return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
33713313 }
....@@ -3374,7 +3316,7 @@
33743316 {
33753317 struct inode *inode;
33763318
3377
- if (!vma->vm_file)
3319
+ if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
33783320 return false;
33793321 if (!vma_is_dax(vma))
33803322 return false;
....@@ -3389,7 +3331,7 @@
33893331 int res = 0;
33903332 if (file->f_flags & O_APPEND)
33913333 res |= IOCB_APPEND;
3392
- if (io_is_direct(file))
3334
+ if (file->f_flags & O_DIRECT)
33933335 res |= IOCB_DIRECT;
33943336 if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
33953337 res |= IOCB_DSYNC;
....@@ -3400,23 +3342,32 @@
34003342
34013343 static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
34023344 {
3345
+ int kiocb_flags = 0;
3346
+
3347
+ /* make sure there's no overlap between RWF and private IOCB flags */
3348
+ BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);
3349
+
3350
+ if (!flags)
3351
+ return 0;
34033352 if (unlikely(flags & ~RWF_SUPPORTED))
34043353 return -EOPNOTSUPP;
34053354
34063355 if (flags & RWF_NOWAIT) {
34073356 if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
34083357 return -EOPNOTSUPP;
3409
- ki->ki_flags |= IOCB_NOWAIT;
3358
+ kiocb_flags |= IOCB_NOIO;
34103359 }
3411
- if (flags & RWF_HIPRI)
3412
- ki->ki_flags |= IOCB_HIPRI;
3413
- if (flags & RWF_DSYNC)
3414
- ki->ki_flags |= IOCB_DSYNC;
3360
+ kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
34153361 if (flags & RWF_SYNC)
3416
- ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
3417
- if (flags & RWF_APPEND)
3418
- ki->ki_flags |= IOCB_APPEND;
3362
+ kiocb_flags |= IOCB_DSYNC;
3363
+
3364
+ ki->ki_flags |= kiocb_flags;
34193365 return 0;
3366
+}
3367
+
3368
+static inline rwf_t iocb_to_rw_flags(int ifl, int iocb_mask)
3369
+{
3370
+ return ifl & iocb_mask;
34203371 }
34213372
34223373 static inline ino_t parent_ino(struct dentry *dentry)
....@@ -3441,7 +3392,7 @@
34413392 */
34423393 struct simple_transaction_argresp {
34433394 ssize_t size;
3444
- char data[0];
3395
+ char data[];
34453396 };
34463397
34473398 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
....@@ -3470,7 +3421,7 @@
34703421 * All attributes contain a text representation of a numeric value
34713422 * that are accessed with the get() and set() functions.
34723423 */
3473
-#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
3424
+#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
34743425 static int __fops ## _open(struct inode *inode, struct file *file) \
34753426 { \
34763427 __simple_attr_check_format(__fmt, 0ull); \
....@@ -3481,9 +3432,15 @@
34813432 .open = __fops ## _open, \
34823433 .release = simple_attr_release, \
34833434 .read = simple_attr_read, \
3484
- .write = simple_attr_write, \
3435
+ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \
34853436 .llseek = generic_file_llseek, \
34863437 }
3438
+
3439
+#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
3440
+ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
3441
+
3442
+#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
3443
+ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
34873444
34883445 static inline __printf(1, 2)
34893446 void __simple_attr_check_format(const char *fmt, ...)
....@@ -3499,14 +3456,16 @@
34993456 size_t len, loff_t *ppos);
35003457 ssize_t simple_attr_write(struct file *file, const char __user *buf,
35013458 size_t len, loff_t *ppos);
3459
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
3460
+ size_t len, loff_t *ppos);
35023461
35033462 struct ctl_table;
35043463 int proc_nr_files(struct ctl_table *table, int write,
3505
- void __user *buffer, size_t *lenp, loff_t *ppos);
3464
+ void *buffer, size_t *lenp, loff_t *ppos);
35063465 int proc_nr_dentry(struct ctl_table *table, int write,
3507
- void __user *buffer, size_t *lenp, loff_t *ppos);
3466
+ void *buffer, size_t *lenp, loff_t *ppos);
35083467 int proc_nr_inodes(struct ctl_table *table, int write,
3509
- void __user *buffer, size_t *lenp, loff_t *ppos);
3468
+ void *buffer, size_t *lenp, loff_t *ppos);
35103469 int __init get_filesystem_list(char *buf);
35113470
35123471 #define __FMODE_EXEC ((__force int) FMODE_EXEC)
....@@ -3518,7 +3477,7 @@
35183477
35193478 static inline bool is_sxid(umode_t mode)
35203479 {
3521
- return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
3480
+ return mode & (S_ISUID | S_ISGID);
35223481 }
35233482
35243483 static inline int check_sticky(struct inode *dir, struct inode *inode)
....@@ -3590,6 +3549,8 @@
35903549 /* mm/fadvise.c */
35913550 extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
35923551 int advice);
3552
+extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
3553
+ int advice);
35933554
35943555 int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
35953556 unsigned int flags);