hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/libfs.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fs/libfs.c
34 * Library for filesystems writers.
....@@ -16,6 +17,9 @@
1617 #include <linux/exportfs.h>
1718 #include <linux/writeback.h>
1819 #include <linux/buffer_head.h> /* sync_mapping_buffers */
20
+#include <linux/fs_context.h>
21
+#include <linux/pseudo_fs.h>
22
+#include <linux/fsnotify.h>
1923 #include <linux/unicode.h>
2024 #include <linux/fscrypt.h>
2125
....@@ -91,14 +95,13 @@
9195 /*
9296 * Returns an element of siblings' list.
9397 * We are looking for <count>th positive after <p>; if
94
- * found, dentry is grabbed and passed to caller via *<res>.
95
- * If no such element exists, the anchor of list is returned
96
- * and *<res> is set to NULL.
98
+ * found, dentry is grabbed and returned to caller.
99
+ * If no such element exists, NULL is returned.
97100 */
98
-static struct list_head *scan_positives(struct dentry *cursor,
101
+static struct dentry *scan_positives(struct dentry *cursor,
99102 struct list_head *p,
100103 loff_t count,
101
- struct dentry **res)
104
+ struct dentry *last)
102105 {
103106 struct dentry *dentry = cursor->d_parent, *found = NULL;
104107
....@@ -126,9 +129,8 @@
126129 }
127130 }
128131 spin_unlock(&dentry->d_lock);
129
- dput(*res);
130
- *res = found;
131
- return p;
132
+ dput(last);
133
+ return found;
132134 }
133135
134136 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
....@@ -137,33 +139,32 @@
137139 switch (whence) {
138140 case 1:
139141 offset += file->f_pos;
142
+ fallthrough;
140143 case 0:
141144 if (offset >= 0)
142145 break;
146
+ fallthrough;
143147 default:
144148 return -EINVAL;
145149 }
146150 if (offset != file->f_pos) {
147151 struct dentry *cursor = file->private_data;
148152 struct dentry *to = NULL;
149
- struct list_head *p;
150153
151
- file->f_pos = offset;
152154 inode_lock_shared(dentry->d_inode);
153155
154
- if (file->f_pos > 2) {
155
- p = scan_positives(cursor, &dentry->d_subdirs,
156
- file->f_pos - 2, &to);
157
- spin_lock(&dentry->d_lock);
158
- list_move(&cursor->d_child, p);
159
- spin_unlock(&dentry->d_lock);
160
- } else {
161
- spin_lock(&dentry->d_lock);
156
+ if (offset > 2)
157
+ to = scan_positives(cursor, &dentry->d_subdirs,
158
+ offset - 2, NULL);
159
+ spin_lock(&dentry->d_lock);
160
+ if (to)
161
+ list_move(&cursor->d_child, &to->d_child);
162
+ else
162163 list_del_init(&cursor->d_child);
163
- spin_unlock(&dentry->d_lock);
164
- }
165
-
164
+ spin_unlock(&dentry->d_lock);
166165 dput(to);
166
+
167
+ file->f_pos = offset;
167168
168169 inode_unlock_shared(dentry->d_inode);
169170 }
....@@ -196,17 +197,23 @@
196197
197198 if (ctx->pos == 2)
198199 p = anchor;
199
- else
200
+ else if (!list_empty(&cursor->d_child))
200201 p = &cursor->d_child;
202
+ else
203
+ return 0;
201204
202
- while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
205
+ while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
203206 if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
204207 d_inode(next)->i_ino, dt_type(d_inode(next))))
205208 break;
206209 ctx->pos++;
210
+ p = &next->d_child;
207211 }
208212 spin_lock(&dentry->d_lock);
209
- list_move_tail(&cursor->d_child, p);
213
+ if (next)
214
+ list_move_tail(&cursor->d_child, &next->d_child);
215
+ else
216
+ list_del_init(&cursor->d_child);
210217 spin_unlock(&dentry->d_lock);
211218 dput(next);
212219
....@@ -218,7 +225,7 @@
218225 {
219226 return -EISDIR;
220227 }
221
-EXPORT_SYMBOL(generic_read_dir);
228
+EXPORT_SYMBOL_NS(generic_read_dir, ANDROID_GKI_VFS_EXPORT_ONLY);
222229
223230 const struct file_operations simple_dir_operations = {
224231 .open = dcache_dir_open,
....@@ -235,38 +242,95 @@
235242 };
236243 EXPORT_SYMBOL(simple_dir_inode_operations);
237244
245
+static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
246
+{
247
+ struct dentry *child = NULL;
248
+ struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
249
+
250
+ spin_lock(&parent->d_lock);
251
+ while ((p = p->next) != &parent->d_subdirs) {
252
+ struct dentry *d = container_of(p, struct dentry, d_child);
253
+ if (simple_positive(d)) {
254
+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
255
+ if (simple_positive(d))
256
+ child = dget_dlock(d);
257
+ spin_unlock(&d->d_lock);
258
+ if (likely(child))
259
+ break;
260
+ }
261
+ }
262
+ spin_unlock(&parent->d_lock);
263
+ dput(prev);
264
+ return child;
265
+}
266
+
267
+void simple_recursive_removal(struct dentry *dentry,
268
+ void (*callback)(struct dentry *))
269
+{
270
+ struct dentry *this = dget(dentry);
271
+ while (true) {
272
+ struct dentry *victim = NULL, *child;
273
+ struct inode *inode = this->d_inode;
274
+
275
+ inode_lock(inode);
276
+ if (d_is_dir(this))
277
+ inode->i_flags |= S_DEAD;
278
+ while ((child = find_next_child(this, victim)) == NULL) {
279
+ // kill and ascend
280
+ // update metadata while it's still locked
281
+ inode->i_ctime = current_time(inode);
282
+ clear_nlink(inode);
283
+ inode_unlock(inode);
284
+ victim = this;
285
+ this = this->d_parent;
286
+ inode = this->d_inode;
287
+ inode_lock(inode);
288
+ if (simple_positive(victim)) {
289
+ d_invalidate(victim); // avoid lost mounts
290
+ if (d_is_dir(victim))
291
+ fsnotify_rmdir(inode, victim);
292
+ else
293
+ fsnotify_unlink(inode, victim);
294
+ if (callback)
295
+ callback(victim);
296
+ dput(victim); // unpin it
297
+ }
298
+ if (victim == dentry) {
299
+ inode->i_ctime = inode->i_mtime =
300
+ current_time(inode);
301
+ if (d_is_dir(dentry))
302
+ drop_nlink(inode);
303
+ inode_unlock(inode);
304
+ dput(dentry);
305
+ return;
306
+ }
307
+ }
308
+ inode_unlock(inode);
309
+ this = child;
310
+ }
311
+}
312
+EXPORT_SYMBOL(simple_recursive_removal);
313
+
238314 static const struct super_operations simple_super_operations = {
239315 .statfs = simple_statfs,
240316 };
241317
242
-/*
243
- * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
244
- * will never be mountable)
245
- */
246
-struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
247
- const struct super_operations *ops, const struct xattr_handler **xattr,
248
- const struct dentry_operations *dops, unsigned long magic)
318
+static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
249319 {
250
- struct super_block *s;
251
- struct dentry *dentry;
320
+ struct pseudo_fs_context *ctx = fc->fs_private;
252321 struct inode *root;
253
- struct qstr d_name = QSTR_INIT(name, strlen(name));
254
-
255
- s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER,
256
- &init_user_ns, NULL);
257
- if (IS_ERR(s))
258
- return ERR_CAST(s);
259322
260323 s->s_maxbytes = MAX_LFS_FILESIZE;
261324 s->s_blocksize = PAGE_SIZE;
262325 s->s_blocksize_bits = PAGE_SHIFT;
263
- s->s_magic = magic;
264
- s->s_op = ops ? ops : &simple_super_operations;
265
- s->s_xattr = xattr;
326
+ s->s_magic = ctx->magic;
327
+ s->s_op = ctx->ops ?: &simple_super_operations;
328
+ s->s_xattr = ctx->xattr;
266329 s->s_time_gran = 1;
267330 root = new_inode(s);
268331 if (!root)
269
- goto Enomem;
332
+ return -ENOMEM;
333
+
270334 /*
271335 * since this is the first inode, make it number 1. New inodes created
272336 * after this must take care not to collide with it (by passing
....@@ -275,22 +339,48 @@
275339 root->i_ino = 1;
276340 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
277341 root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
278
- dentry = __d_alloc(s, &d_name);
279
- if (!dentry) {
280
- iput(root);
281
- goto Enomem;
282
- }
283
- d_instantiate(dentry, root);
284
- s->s_root = dentry;
285
- s->s_d_op = dops;
286
- s->s_flags |= SB_ACTIVE;
287
- return dget(s->s_root);
288
-
289
-Enomem:
290
- deactivate_locked_super(s);
291
- return ERR_PTR(-ENOMEM);
342
+ s->s_root = d_make_root(root);
343
+ if (!s->s_root)
344
+ return -ENOMEM;
345
+ s->s_d_op = ctx->dops;
346
+ return 0;
292347 }
293
-EXPORT_SYMBOL(mount_pseudo_xattr);
348
+
349
+static int pseudo_fs_get_tree(struct fs_context *fc)
350
+{
351
+ return get_tree_nodev(fc, pseudo_fs_fill_super);
352
+}
353
+
354
+static void pseudo_fs_free(struct fs_context *fc)
355
+{
356
+ kfree(fc->fs_private);
357
+}
358
+
359
+static const struct fs_context_operations pseudo_fs_context_ops = {
360
+ .free = pseudo_fs_free,
361
+ .get_tree = pseudo_fs_get_tree,
362
+};
363
+
364
+/*
365
+ * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
366
+ * will never be mountable)
367
+ */
368
+struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
369
+ unsigned long magic)
370
+{
371
+ struct pseudo_fs_context *ctx;
372
+
373
+ ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
374
+ if (likely(ctx)) {
375
+ ctx->magic = magic;
376
+ fc->fs_private = ctx;
377
+ fc->ops = &pseudo_fs_context_ops;
378
+ fc->sb_flags |= SB_NOUSER;
379
+ fc->global = true;
380
+ }
381
+ return ctx;
382
+}
383
+EXPORT_SYMBOL(init_pseudo);
294384
295385 int simple_open(struct inode *inode, struct file *file)
296386 {
....@@ -455,8 +545,7 @@
455545
456546 /**
457547 * simple_write_end - .write_end helper for non-block-device FSes
458
- * @available: See .write_end of address_space_operations
459
- * @file: "
548
+ * @file: See .write_end of address_space_operations
460549 * @mapping: "
461550 * @pos: "
462551 * @len: "
....@@ -866,8 +955,8 @@
866955 EXPORT_SYMBOL_GPL(simple_attr_read);
867956
868957 /* interpret the buffer as a number to call the set function with */
869
-ssize_t simple_attr_write(struct file *file, const char __user *buf,
870
- size_t len, loff_t *ppos)
958
+static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
959
+ size_t len, loff_t *ppos, bool is_signed)
871960 {
872961 struct simple_attr *attr;
873962 unsigned long long val;
....@@ -888,7 +977,10 @@
888977 goto out;
889978
890979 attr->set_buf[size] = '\0';
891
- ret = kstrtoull(attr->set_buf, 0, &val);
980
+ if (is_signed)
981
+ ret = kstrtoll(attr->set_buf, 0, &val);
982
+ else
983
+ ret = kstrtoull(attr->set_buf, 0, &val);
892984 if (ret)
893985 goto out;
894986 ret = attr->set(attr->data, val);
....@@ -898,7 +990,20 @@
898990 mutex_unlock(&attr->mutex);
899991 return ret;
900992 }
993
+
994
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
995
+ size_t len, loff_t *ppos)
996
+{
997
+ return simple_attr_write_xsigned(file, buf, len, ppos, false);
998
+}
901999 EXPORT_SYMBOL_GPL(simple_attr_write);
1000
+
1001
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
1002
+ size_t len, loff_t *ppos)
1003
+{
1004
+ return simple_attr_write_xsigned(file, buf, len, ppos, true);
1005
+}
1006
+EXPORT_SYMBOL_GPL(simple_attr_write_signed);
9021007
9031008 /**
9041009 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
....@@ -1028,7 +1133,7 @@
10281133 err = __generic_file_fsync(file, start, end, datasync);
10291134 if (err)
10301135 return err;
1031
- return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
1136
+ return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
10321137 }
10331138 EXPORT_SYMBOL(generic_file_fsync);
10341139
....@@ -1125,11 +1230,6 @@
11251230 return 0;
11261231 };
11271232
1128
-/*
1129
- * A single inode exists for all anon_inode files. Contrary to pipes,
1130
- * anon_inode inodes have no associated per-instance data, so we need
1131
- * only allocate one of them.
1132
- */
11331233 struct inode *alloc_anon_inode(struct super_block *s)
11341234 {
11351235 static const struct address_space_operations anon_aops = {
....@@ -1177,6 +1277,20 @@
11771277 }
11781278 EXPORT_SYMBOL(simple_nosetlease);
11791279
1280
+/**
1281
+ * simple_get_link - generic helper to get the target of "fast" symlinks
1282
+ * @dentry: not used here
1283
+ * @inode: the symlink inode
1284
+ * @done: not used here
1285
+ *
1286
+ * Generic helper for filesystems to use for symlink inodes where a pointer to
1287
+ * the symlink target is stored in ->i_link. NOTE: this isn't normally called,
1288
+ * since as an optimization the path lookup code uses any non-NULL ->i_link
1289
+ * directly, without calling ->get_link(). But ->get_link() still must be set,
1290
+ * to mark the inode_operations as being for a symlink.
1291
+ *
1292
+ * Return: the symlink target
1293
+ */
11801294 const char *simple_get_link(struct dentry *dentry, struct inode *inode,
11811295 struct delayed_call *done)
11821296 {
....@@ -1266,27 +1380,38 @@
12661380 }
12671381
12681382 #ifdef CONFIG_UNICODE
1269
-bool needs_casefold(const struct inode *dir)
1383
+/*
1384
+ * Determine if the name of a dentry should be casefolded.
1385
+ *
1386
+ * Return: if names will need casefolding
1387
+ */
1388
+static bool needs_casefold(const struct inode *dir)
12701389 {
1271
- return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
1272
- (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
1390
+ return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
12731391 }
1274
-EXPORT_SYMBOL(needs_casefold);
12751392
1276
-int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
1277
- const char *str, const struct qstr *name)
1393
+/**
1394
+ * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
1395
+ * @dentry: dentry whose name we are checking against
1396
+ * @len: len of name of dentry
1397
+ * @str: str pointer to name of dentry
1398
+ * @name: Name to compare against
1399
+ *
1400
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
1401
+ */
1402
+static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
1403
+ const char *str, const struct qstr *name)
12781404 {
12791405 const struct dentry *parent = READ_ONCE(dentry->d_parent);
1280
- const struct inode *inode = READ_ONCE(parent->d_inode);
1406
+ const struct inode *dir = READ_ONCE(parent->d_inode);
12811407 const struct super_block *sb = dentry->d_sb;
12821408 const struct unicode_map *um = sb->s_encoding;
1283
- struct qstr entry = QSTR_INIT(str, len);
1409
+ struct qstr qstr = QSTR_INIT(str, len);
12841410 char strbuf[DNAME_INLINE_LEN];
12851411 int ret;
12861412
1287
- if (!inode || !needs_casefold(inode))
1413
+ if (!dir || !needs_casefold(dir))
12881414 goto fallback;
1289
-
12901415 /*
12911416 * If the dentry name is stored in-line, then it may be concurrently
12921417 * modified by a rename. If this happens, the VFS will eventually retry
....@@ -1297,47 +1422,44 @@
12971422 if (len <= DNAME_INLINE_LEN - 1) {
12981423 memcpy(strbuf, str, len);
12991424 strbuf[len] = 0;
1300
- entry.name = strbuf;
1425
+ qstr.name = strbuf;
13011426 /* prevent compiler from optimizing out the temporary buffer */
13021427 barrier();
13031428 }
1304
-
1305
- ret = utf8_strncasecmp(um, name, &entry);
1429
+ ret = utf8_strncasecmp(um, name, &qstr);
13061430 if (ret >= 0)
13071431 return ret;
13081432
1309
- if (sb_has_enc_strict_mode(sb))
1433
+ if (sb_has_strict_encoding(sb))
13101434 return -EINVAL;
13111435 fallback:
13121436 if (len != name->len)
13131437 return 1;
13141438 return !!memcmp(str, name->name, len);
13151439 }
1316
-EXPORT_SYMBOL(generic_ci_d_compare);
13171440
1318
-int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
1441
+/**
1442
+ * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
1443
+ * @dentry: dentry of the parent directory
1444
+ * @str: qstr of name whose hash we should fill in
1445
+ *
1446
+ * Return: 0 if hash was successful or unchanged, and -EINVAL on error
1447
+ */
1448
+static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
13191449 {
1320
- const struct inode *inode = READ_ONCE(dentry->d_inode);
1450
+ const struct inode *dir = READ_ONCE(dentry->d_inode);
13211451 struct super_block *sb = dentry->d_sb;
13221452 const struct unicode_map *um = sb->s_encoding;
13231453 int ret = 0;
13241454
1325
- if (!inode || !needs_casefold(inode))
1455
+ if (!dir || !needs_casefold(dir))
13261456 return 0;
13271457
13281458 ret = utf8_casefold_hash(um, dentry, str);
1329
- if (ret < 0)
1330
- goto err;
1331
-
1459
+ if (ret < 0 && sb_has_strict_encoding(sb))
1460
+ return -EINVAL;
13321461 return 0;
1333
-err:
1334
- if (sb_has_enc_strict_mode(sb))
1335
- ret = -EINVAL;
1336
- else
1337
- ret = 0;
1338
- return ret;
13391462 }
1340
-EXPORT_SYMBOL(generic_ci_d_hash);
13411463
13421464 static const struct dentry_operations generic_ci_dentry_ops = {
13431465 .d_hash = generic_ci_d_hash,
....@@ -1351,7 +1473,7 @@
13511473 };
13521474 #endif
13531475
1354
-#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
1476
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
13551477 static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
13561478 .d_hash = generic_ci_d_hash,
13571479 .d_compare = generic_ci_d_compare,
....@@ -1361,28 +1483,48 @@
13611483
13621484 /**
13631485 * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
1364
- * @dir: parent of dentry whose ops to set
1365
- * @dentry: detnry to set ops on
1486
+ * @dentry: dentry to set ops on
13661487 *
1367
- * This function sets the dentry ops for the given dentry to handle both
1368
- * casefolding and encryption of the dentry name.
1488
+ * Casefolded directories need d_hash and d_compare set, so that the dentries
1489
+ * contained in them are handled case-insensitively. Note that these operations
1490
+ * are needed on the parent directory rather than on the dentries in it, and
1491
+ * while the casefolding flag can be toggled on and off on an empty directory,
1492
+ * dentry_operations can't be changed later. As a result, if the filesystem has
1493
+ * casefolding support enabled at all, we have to give all dentries the
1494
+ * casefolding operations even if their inode doesn't have the casefolding flag
1495
+ * currently (and thus the casefolding ops would be no-ops for now).
1496
+ *
1497
+ * Encryption works differently in that the only dentry operation it needs is
1498
+ * d_revalidate, which it only needs on dentries that have the no-key name flag.
1499
+ * The no-key flag can't be set "later", so we don't have to worry about that.
1500
+ *
1501
+ * Finally, to maximize compatibility with overlayfs (which isn't compatible
1502
+ * with certain dentry operations) and to avoid taking an unnecessary
1503
+ * performance hit, we use custom dentry_operations for each possible
1504
+ * combination rather than always installing all operations.
13691505 */
1370
-void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry)
1506
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
13711507 {
13721508 #ifdef CONFIG_FS_ENCRYPTION
1373
- if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
1374
-#ifdef CONFIG_UNICODE
1375
- if (dir->i_sb->s_encoding) {
1376
- d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
1377
- return;
1378
- }
1509
+ bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
13791510 #endif
1511
+#ifdef CONFIG_UNICODE
1512
+ bool needs_ci_ops = dentry->d_sb->s_encoding;
1513
+#endif
1514
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
1515
+ if (needs_encrypt_ops && needs_ci_ops) {
1516
+ d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
1517
+ return;
1518
+ }
1519
+#endif
1520
+#ifdef CONFIG_FS_ENCRYPTION
1521
+ if (needs_encrypt_ops) {
13801522 d_set_d_op(dentry, &generic_encrypted_dentry_ops);
13811523 return;
13821524 }
13831525 #endif
13841526 #ifdef CONFIG_UNICODE
1385
- if (dir->i_sb->s_encoding) {
1527
+ if (needs_ci_ops) {
13861528 d_set_d_op(dentry, &generic_ci_dentry_ops);
13871529 return;
13881530 }