hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/fs/libfs.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fs/libfs.c
34 * Library for filesystems writers.
....@@ -16,6 +17,9 @@
1617 #include <linux/exportfs.h>
1718 #include <linux/writeback.h>
1819 #include <linux/buffer_head.h> /* sync_mapping_buffers */
20
+#include <linux/fs_context.h>
21
+#include <linux/pseudo_fs.h>
22
+#include <linux/fsnotify.h>
1923 #include <linux/unicode.h>
2024 #include <linux/fscrypt.h>
2125
....@@ -91,14 +95,13 @@
9195 /*
9296 * Returns an element of siblings' list.
9397 * We are looking for <count>th positive after <p>; if
94
- * found, dentry is grabbed and passed to caller via *<res>.
95
- * If no such element exists, the anchor of list is returned
96
- * and *<res> is set to NULL.
98
+ * found, dentry is grabbed and returned to caller.
99
+ * If no such element exists, NULL is returned.
97100 */
98
-static struct list_head *scan_positives(struct dentry *cursor,
101
+static struct dentry *scan_positives(struct dentry *cursor,
99102 struct list_head *p,
100103 loff_t count,
101
- struct dentry **res)
104
+ struct dentry *last)
102105 {
103106 struct dentry *dentry = cursor->d_parent, *found = NULL;
104107
....@@ -126,9 +129,8 @@
126129 }
127130 }
128131 spin_unlock(&dentry->d_lock);
129
- dput(*res);
130
- *res = found;
131
- return p;
132
+ dput(last);
133
+ return found;
132134 }
133135
134136 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
....@@ -137,33 +139,32 @@
137139 switch (whence) {
138140 case 1:
139141 offset += file->f_pos;
142
+ fallthrough;
140143 case 0:
141144 if (offset >= 0)
142145 break;
146
+ fallthrough;
143147 default:
144148 return -EINVAL;
145149 }
146150 if (offset != file->f_pos) {
147151 struct dentry *cursor = file->private_data;
148152 struct dentry *to = NULL;
149
- struct list_head *p;
150153
151
- file->f_pos = offset;
152154 inode_lock_shared(dentry->d_inode);
153155
154
- if (file->f_pos > 2) {
155
- p = scan_positives(cursor, &dentry->d_subdirs,
156
- file->f_pos - 2, &to);
157
- spin_lock(&dentry->d_lock);
158
- list_move(&cursor->d_child, p);
159
- spin_unlock(&dentry->d_lock);
160
- } else {
161
- spin_lock(&dentry->d_lock);
156
+ if (offset > 2)
157
+ to = scan_positives(cursor, &dentry->d_subdirs,
158
+ offset - 2, NULL);
159
+ spin_lock(&dentry->d_lock);
160
+ if (to)
161
+ list_move(&cursor->d_child, &to->d_child);
162
+ else
162163 list_del_init(&cursor->d_child);
163
- spin_unlock(&dentry->d_lock);
164
- }
165
-
164
+ spin_unlock(&dentry->d_lock);
166165 dput(to);
166
+
167
+ file->f_pos = offset;
167168
168169 inode_unlock_shared(dentry->d_inode);
169170 }
....@@ -196,17 +197,23 @@
196197
197198 if (ctx->pos == 2)
198199 p = anchor;
199
- else
200
+ else if (!list_empty(&cursor->d_child))
200201 p = &cursor->d_child;
202
+ else
203
+ return 0;
201204
202
- while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
205
+ while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
203206 if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
204207 d_inode(next)->i_ino, dt_type(d_inode(next))))
205208 break;
206209 ctx->pos++;
210
+ p = &next->d_child;
207211 }
208212 spin_lock(&dentry->d_lock);
209
- list_move_tail(&cursor->d_child, p);
213
+ if (next)
214
+ list_move_tail(&cursor->d_child, &next->d_child);
215
+ else
216
+ list_del_init(&cursor->d_child);
210217 spin_unlock(&dentry->d_lock);
211218 dput(next);
212219
....@@ -218,7 +225,7 @@
218225 {
219226 return -EISDIR;
220227 }
221
-EXPORT_SYMBOL(generic_read_dir);
228
+EXPORT_SYMBOL_NS(generic_read_dir, ANDROID_GKI_VFS_EXPORT_ONLY);
222229
223230 const struct file_operations simple_dir_operations = {
224231 .open = dcache_dir_open,
....@@ -235,38 +242,95 @@
235242 };
236243 EXPORT_SYMBOL(simple_dir_inode_operations);
237244
245
+static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
246
+{
247
+ struct dentry *child = NULL;
248
+ struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
249
+
250
+ spin_lock(&parent->d_lock);
251
+ while ((p = p->next) != &parent->d_subdirs) {
252
+ struct dentry *d = container_of(p, struct dentry, d_child);
253
+ if (simple_positive(d)) {
254
+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
255
+ if (simple_positive(d))
256
+ child = dget_dlock(d);
257
+ spin_unlock(&d->d_lock);
258
+ if (likely(child))
259
+ break;
260
+ }
261
+ }
262
+ spin_unlock(&parent->d_lock);
263
+ dput(prev);
264
+ return child;
265
+}
266
+
267
+void simple_recursive_removal(struct dentry *dentry,
268
+ void (*callback)(struct dentry *))
269
+{
270
+ struct dentry *this = dget(dentry);
271
+ while (true) {
272
+ struct dentry *victim = NULL, *child;
273
+ struct inode *inode = this->d_inode;
274
+
275
+ inode_lock(inode);
276
+ if (d_is_dir(this))
277
+ inode->i_flags |= S_DEAD;
278
+ while ((child = find_next_child(this, victim)) == NULL) {
279
+ // kill and ascend
280
+ // update metadata while it's still locked
281
+ inode->i_ctime = current_time(inode);
282
+ clear_nlink(inode);
283
+ inode_unlock(inode);
284
+ victim = this;
285
+ this = this->d_parent;
286
+ inode = this->d_inode;
287
+ inode_lock(inode);
288
+ if (simple_positive(victim)) {
289
+ d_invalidate(victim); // avoid lost mounts
290
+ if (d_is_dir(victim))
291
+ fsnotify_rmdir(inode, victim);
292
+ else
293
+ fsnotify_unlink(inode, victim);
294
+ if (callback)
295
+ callback(victim);
296
+ dput(victim); // unpin it
297
+ }
298
+ if (victim == dentry) {
299
+ inode->i_ctime = inode->i_mtime =
300
+ current_time(inode);
301
+ if (d_is_dir(dentry))
302
+ drop_nlink(inode);
303
+ inode_unlock(inode);
304
+ dput(dentry);
305
+ return;
306
+ }
307
+ }
308
+ inode_unlock(inode);
309
+ this = child;
310
+ }
311
+}
312
+EXPORT_SYMBOL(simple_recursive_removal);
313
+
238314 static const struct super_operations simple_super_operations = {
239315 .statfs = simple_statfs,
240316 };
241317
242
-/*
243
- * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
244
- * will never be mountable)
245
- */
246
-struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name,
247
- const struct super_operations *ops, const struct xattr_handler **xattr,
248
- const struct dentry_operations *dops, unsigned long magic)
318
+static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
249319 {
250
- struct super_block *s;
251
- struct dentry *dentry;
320
+ struct pseudo_fs_context *ctx = fc->fs_private;
252321 struct inode *root;
253
- struct qstr d_name = QSTR_INIT(name, strlen(name));
254
-
255
- s = sget_userns(fs_type, NULL, set_anon_super, SB_KERNMOUNT|SB_NOUSER,
256
- &init_user_ns, NULL);
257
- if (IS_ERR(s))
258
- return ERR_CAST(s);
259322
260323 s->s_maxbytes = MAX_LFS_FILESIZE;
261324 s->s_blocksize = PAGE_SIZE;
262325 s->s_blocksize_bits = PAGE_SHIFT;
263
- s->s_magic = magic;
264
- s->s_op = ops ? ops : &simple_super_operations;
265
- s->s_xattr = xattr;
326
+ s->s_magic = ctx->magic;
327
+ s->s_op = ctx->ops ?: &simple_super_operations;
328
+ s->s_xattr = ctx->xattr;
266329 s->s_time_gran = 1;
267330 root = new_inode(s);
268331 if (!root)
269
- goto Enomem;
332
+ return -ENOMEM;
333
+
270334 /*
271335 * since this is the first inode, make it number 1. New inodes created
272336 * after this must take care not to collide with it (by passing
....@@ -275,22 +339,48 @@
275339 root->i_ino = 1;
276340 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
277341 root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
278
- dentry = __d_alloc(s, &d_name);
279
- if (!dentry) {
280
- iput(root);
281
- goto Enomem;
282
- }
283
- d_instantiate(dentry, root);
284
- s->s_root = dentry;
285
- s->s_d_op = dops;
286
- s->s_flags |= SB_ACTIVE;
287
- return dget(s->s_root);
288
-
289
-Enomem:
290
- deactivate_locked_super(s);
291
- return ERR_PTR(-ENOMEM);
342
+ s->s_root = d_make_root(root);
343
+ if (!s->s_root)
344
+ return -ENOMEM;
345
+ s->s_d_op = ctx->dops;
346
+ return 0;
292347 }
293
-EXPORT_SYMBOL(mount_pseudo_xattr);
348
+
349
+static int pseudo_fs_get_tree(struct fs_context *fc)
350
+{
351
+ return get_tree_nodev(fc, pseudo_fs_fill_super);
352
+}
353
+
354
+static void pseudo_fs_free(struct fs_context *fc)
355
+{
356
+ kfree(fc->fs_private);
357
+}
358
+
359
+static const struct fs_context_operations pseudo_fs_context_ops = {
360
+ .free = pseudo_fs_free,
361
+ .get_tree = pseudo_fs_get_tree,
362
+};
363
+
364
+/*
365
+ * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
366
+ * will never be mountable)
367
+ */
368
+struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
369
+ unsigned long magic)
370
+{
371
+ struct pseudo_fs_context *ctx;
372
+
373
+ ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
374
+ if (likely(ctx)) {
375
+ ctx->magic = magic;
376
+ fc->fs_private = ctx;
377
+ fc->ops = &pseudo_fs_context_ops;
378
+ fc->sb_flags |= SB_NOUSER;
379
+ fc->global = true;
380
+ }
381
+ return ctx;
382
+}
383
+EXPORT_SYMBOL(init_pseudo);
294384
295385 int simple_open(struct inode *inode, struct file *file)
296386 {
....@@ -455,8 +545,7 @@
455545
456546 /**
457547 * simple_write_end - .write_end helper for non-block-device FSes
458
- * @available: See .write_end of address_space_operations
459
- * @file: "
548
+ * @file: See .write_end of address_space_operations
460549 * @mapping: "
461550 * @pos: "
462551 * @len: "
....@@ -1028,7 +1117,7 @@
10281117 err = __generic_file_fsync(file, start, end, datasync);
10291118 if (err)
10301119 return err;
1031
- return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
1120
+ return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
10321121 }
10331122 EXPORT_SYMBOL(generic_file_fsync);
10341123
....@@ -1125,11 +1214,6 @@
11251214 return 0;
11261215 };
11271216
1128
-/*
1129
- * A single inode exists for all anon_inode files. Contrary to pipes,
1130
- * anon_inode inodes have no associated per-instance data, so we need
1131
- * only allocate one of them.
1132
- */
11331217 struct inode *alloc_anon_inode(struct super_block *s)
11341218 {
11351219 static const struct address_space_operations anon_aops = {
....@@ -1177,6 +1261,20 @@
11771261 }
11781262 EXPORT_SYMBOL(simple_nosetlease);
11791263
1264
+/**
1265
+ * simple_get_link - generic helper to get the target of "fast" symlinks
1266
+ * @dentry: not used here
1267
+ * @inode: the symlink inode
1268
+ * @done: not used here
1269
+ *
1270
+ * Generic helper for filesystems to use for symlink inodes where a pointer to
1271
+ * the symlink target is stored in ->i_link. NOTE: this isn't normally called,
1272
+ * since as an optimization the path lookup code uses any non-NULL ->i_link
1273
+ * directly, without calling ->get_link(). But ->get_link() still must be set,
1274
+ * to mark the inode_operations as being for a symlink.
1275
+ *
1276
+ * Return: the symlink target
1277
+ */
11801278 const char *simple_get_link(struct dentry *dentry, struct inode *inode,
11811279 struct delayed_call *done)
11821280 {
....@@ -1266,27 +1364,38 @@
12661364 }
12671365
12681366 #ifdef CONFIG_UNICODE
1269
-bool needs_casefold(const struct inode *dir)
1367
+/*
1368
+ * Determine if the name of a dentry should be casefolded.
1369
+ *
1370
+ * Return: if names will need casefolding
1371
+ */
1372
+static bool needs_casefold(const struct inode *dir)
12701373 {
1271
- return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
1272
- (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
1374
+ return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
12731375 }
1274
-EXPORT_SYMBOL(needs_casefold);
12751376
1276
-int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
1277
- const char *str, const struct qstr *name)
1377
+/**
1378
+ * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
1379
+ * @dentry: dentry whose name we are checking against
1380
+ * @len: len of name of dentry
1381
+ * @str: str pointer to name of dentry
1382
+ * @name: Name to compare against
1383
+ *
1384
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
1385
+ */
1386
+static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
1387
+ const char *str, const struct qstr *name)
12781388 {
12791389 const struct dentry *parent = READ_ONCE(dentry->d_parent);
1280
- const struct inode *inode = READ_ONCE(parent->d_inode);
1390
+ const struct inode *dir = READ_ONCE(parent->d_inode);
12811391 const struct super_block *sb = dentry->d_sb;
12821392 const struct unicode_map *um = sb->s_encoding;
1283
- struct qstr entry = QSTR_INIT(str, len);
1393
+ struct qstr qstr = QSTR_INIT(str, len);
12841394 char strbuf[DNAME_INLINE_LEN];
12851395 int ret;
12861396
1287
- if (!inode || !needs_casefold(inode))
1397
+ if (!dir || !needs_casefold(dir))
12881398 goto fallback;
1289
-
12901399 /*
12911400 * If the dentry name is stored in-line, then it may be concurrently
12921401 * modified by a rename. If this happens, the VFS will eventually retry
....@@ -1297,47 +1406,44 @@
12971406 if (len <= DNAME_INLINE_LEN - 1) {
12981407 memcpy(strbuf, str, len);
12991408 strbuf[len] = 0;
1300
- entry.name = strbuf;
1409
+ qstr.name = strbuf;
13011410 /* prevent compiler from optimizing out the temporary buffer */
13021411 barrier();
13031412 }
1304
-
1305
- ret = utf8_strncasecmp(um, name, &entry);
1413
+ ret = utf8_strncasecmp(um, name, &qstr);
13061414 if (ret >= 0)
13071415 return ret;
13081416
1309
- if (sb_has_enc_strict_mode(sb))
1417
+ if (sb_has_strict_encoding(sb))
13101418 return -EINVAL;
13111419 fallback:
13121420 if (len != name->len)
13131421 return 1;
13141422 return !!memcmp(str, name->name, len);
13151423 }
1316
-EXPORT_SYMBOL(generic_ci_d_compare);
13171424
1318
-int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
1425
+/**
1426
+ * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
1427
+ * @dentry: dentry of the parent directory
1428
+ * @str: qstr of name whose hash we should fill in
1429
+ *
1430
+ * Return: 0 if hash was successful or unchanged, and -EINVAL on error
1431
+ */
1432
+static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
13191433 {
1320
- const struct inode *inode = READ_ONCE(dentry->d_inode);
1434
+ const struct inode *dir = READ_ONCE(dentry->d_inode);
13211435 struct super_block *sb = dentry->d_sb;
13221436 const struct unicode_map *um = sb->s_encoding;
13231437 int ret = 0;
13241438
1325
- if (!inode || !needs_casefold(inode))
1439
+ if (!dir || !needs_casefold(dir))
13261440 return 0;
13271441
13281442 ret = utf8_casefold_hash(um, dentry, str);
1329
- if (ret < 0)
1330
- goto err;
1331
-
1443
+ if (ret < 0 && sb_has_strict_encoding(sb))
1444
+ return -EINVAL;
13321445 return 0;
1333
-err:
1334
- if (sb_has_enc_strict_mode(sb))
1335
- ret = -EINVAL;
1336
- else
1337
- ret = 0;
1338
- return ret;
13391446 }
1340
-EXPORT_SYMBOL(generic_ci_d_hash);
13411447
13421448 static const struct dentry_operations generic_ci_dentry_ops = {
13431449 .d_hash = generic_ci_d_hash,
....@@ -1351,7 +1457,7 @@
13511457 };
13521458 #endif
13531459
1354
-#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
1460
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
13551461 static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
13561462 .d_hash = generic_ci_d_hash,
13571463 .d_compare = generic_ci_d_compare,
....@@ -1361,28 +1467,48 @@
13611467
13621468 /**
13631469 * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
1364
- * @dir: parent of dentry whose ops to set
1365
- * @dentry: detnry to set ops on
1470
+ * @dentry: dentry to set ops on
13661471 *
1367
- * This function sets the dentry ops for the given dentry to handle both
1368
- * casefolding and encryption of the dentry name.
1472
+ * Casefolded directories need d_hash and d_compare set, so that the dentries
1473
+ * contained in them are handled case-insensitively. Note that these operations
1474
+ * are needed on the parent directory rather than on the dentries in it, and
1475
+ * while the casefolding flag can be toggled on and off on an empty directory,
1476
+ * dentry_operations can't be changed later. As a result, if the filesystem has
1477
+ * casefolding support enabled at all, we have to give all dentries the
1478
+ * casefolding operations even if their inode doesn't have the casefolding flag
1479
+ * currently (and thus the casefolding ops would be no-ops for now).
1480
+ *
1481
+ * Encryption works differently in that the only dentry operation it needs is
1482
+ * d_revalidate, which it only needs on dentries that have the no-key name flag.
1483
+ * The no-key flag can't be set "later", so we don't have to worry about that.
1484
+ *
1485
+ * Finally, to maximize compatibility with overlayfs (which isn't compatible
1486
+ * with certain dentry operations) and to avoid taking an unnecessary
1487
+ * performance hit, we use custom dentry_operations for each possible
1488
+ * combination rather than always installing all operations.
13691489 */
1370
-void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry)
1490
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
13711491 {
13721492 #ifdef CONFIG_FS_ENCRYPTION
1373
- if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
1374
-#ifdef CONFIG_UNICODE
1375
- if (dir->i_sb->s_encoding) {
1376
- d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
1377
- return;
1378
- }
1493
+ bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
13791494 #endif
1495
+#ifdef CONFIG_UNICODE
1496
+ bool needs_ci_ops = dentry->d_sb->s_encoding;
1497
+#endif
1498
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
1499
+ if (needs_encrypt_ops && needs_ci_ops) {
1500
+ d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
1501
+ return;
1502
+ }
1503
+#endif
1504
+#ifdef CONFIG_FS_ENCRYPTION
1505
+ if (needs_encrypt_ops) {
13801506 d_set_d_op(dentry, &generic_encrypted_dentry_ops);
13811507 return;
13821508 }
13831509 #endif
13841510 #ifdef CONFIG_UNICODE
1385
- if (dir->i_sb->s_encoding) {
1511
+ if (needs_ci_ops) {
13861512 d_set_d_op(dentry, &generic_ci_dentry_ops);
13871513 return;
13881514 }