| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * |
|---|
| 3 | 4 | * Copyright (C) 2011 Novell Inc. |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms of the GNU General Public License version 2 as published by |
|---|
| 7 | | - * the Free Software Foundation. |
|---|
| 8 | 5 | */ |
|---|
| 9 | 6 | |
|---|
| 10 | 7 | #include <linux/fs.h> |
|---|
| .. | .. |
|---|
| 13 | 10 | #include <linux/xattr.h> |
|---|
| 14 | 11 | #include <linux/posix_acl.h> |
|---|
| 15 | 12 | #include <linux/ratelimit.h> |
|---|
| 13 | +#include <linux/fiemap.h> |
|---|
| 16 | 14 | #include "overlayfs.h" |
|---|
| 17 | 15 | |
|---|
| 18 | 16 | |
|---|
| .. | .. |
|---|
| 61 | 59 | if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) |
|---|
| 62 | 60 | attr->ia_valid &= ~ATTR_MODE; |
|---|
| 63 | 61 | |
|---|
| 62 | + /* |
|---|
| 63 | + * We might have to translate ovl file into real file object |
|---|
| 64 | + * once use cases emerge. For now, simply don't let underlying |
|---|
| 65 | + * filesystem rely on attr->ia_file |
|---|
| 66 | + */ |
|---|
| 67 | + attr->ia_valid &= ~ATTR_FILE; |
|---|
| 68 | + |
|---|
| 69 | + /* |
|---|
| 70 | + * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN |
|---|
| 71 | + * set. Overlayfs does not pass O_TRUNC flag to underlying |
|---|
| 72 | + * filesystem during open -> do not pass ATTR_OPEN. This |
|---|
| 73 | + * disables optimization in fuse which assumes open(O_TRUNC) |
|---|
| 74 | + * already set file size to 0. But we never passed O_TRUNC to |
|---|
| 75 | + * fuse. So by clearing ATTR_OPEN, fuse will be forced to send |
|---|
| 76 | + * setattr request to server. |
|---|
| 77 | + */ |
|---|
| 78 | + attr->ia_valid &= ~ATTR_OPEN; |
|---|
| 79 | + |
|---|
| 64 | 80 | inode_lock(upperdentry->d_inode); |
|---|
| 65 | 81 | old_cred = ovl_override_creds(dentry->d_sb); |
|---|
| 66 | 82 | err = notify_change(upperdentry, attr, NULL); |
|---|
| 67 | | - ovl_revert_creds(old_cred); |
|---|
| 83 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 68 | 84 | if (!err) |
|---|
| 69 | 85 | ovl_copyattr(upperdentry->d_inode, dentry->d_inode); |
|---|
| 70 | 86 | inode_unlock(upperdentry->d_inode); |
|---|
| .. | .. |
|---|
| 78 | 94 | return err; |
|---|
| 79 | 95 | } |
|---|
| 80 | 96 | |
|---|
| 81 | | -static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, |
|---|
| 82 | | - struct ovl_layer *lower_layer) |
|---|
| 97 | +static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) |
|---|
| 83 | 98 | { |
|---|
| 84 | | - bool samefs = ovl_same_sb(dentry->d_sb); |
|---|
| 99 | + bool samefs = ovl_same_fs(dentry->d_sb); |
|---|
| 85 | 100 | unsigned int xinobits = ovl_xino_bits(dentry->d_sb); |
|---|
| 101 | + unsigned int xinoshift = 64 - xinobits; |
|---|
| 86 | 102 | |
|---|
| 87 | 103 | if (samefs) { |
|---|
| 88 | 104 | /* |
|---|
| .. | .. |
|---|
| 93 | 109 | stat->dev = dentry->d_sb->s_dev; |
|---|
| 94 | 110 | return 0; |
|---|
| 95 | 111 | } else if (xinobits) { |
|---|
| 96 | | - unsigned int shift = 64 - xinobits; |
|---|
| 97 | 112 | /* |
|---|
| 98 | 113 | * All inode numbers of underlying fs should not be using the |
|---|
| 99 | 114 | * high xinobits, so we use high xinobits to partition the |
|---|
| 100 | 115 | * overlay st_ino address space. The high bits holds the fsid |
|---|
| 101 | | - * (upper fsid is 0). This way overlay inode numbers are unique |
|---|
| 102 | | - * and all inodes use overlay st_dev. Inode numbers are also |
|---|
| 103 | | - * persistent for a given layer configuration. |
|---|
| 116 | + * (upper fsid is 0). The lowest xinobit is reserved for mapping |
|---|
| 117 | + * the non-peresistent inode numbers range in case of overflow. |
|---|
| 118 | + * This way all overlay inode numbers are unique and use the |
|---|
| 119 | + * overlay st_dev. |
|---|
| 104 | 120 | */ |
|---|
| 105 | | - if (stat->ino >> shift) { |
|---|
| 106 | | - pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", |
|---|
| 107 | | - dentry, stat->ino, xinobits); |
|---|
| 108 | | - } else { |
|---|
| 109 | | - if (lower_layer) |
|---|
| 110 | | - stat->ino |= ((u64)lower_layer->fsid) << shift; |
|---|
| 111 | | - |
|---|
| 121 | + if (likely(!(stat->ino >> xinoshift))) { |
|---|
| 122 | + stat->ino |= ((u64)fsid) << (xinoshift + 1); |
|---|
| 112 | 123 | stat->dev = dentry->d_sb->s_dev; |
|---|
| 113 | 124 | return 0; |
|---|
| 125 | + } else if (ovl_xino_warn(dentry->d_sb)) { |
|---|
| 126 | + pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", |
|---|
| 127 | + dentry, stat->ino, xinobits); |
|---|
| 114 | 128 | } |
|---|
| 115 | 129 | } |
|---|
| 116 | 130 | |
|---|
| .. | .. |
|---|
| 127 | 141 | */ |
|---|
| 128 | 142 | stat->dev = dentry->d_sb->s_dev; |
|---|
| 129 | 143 | stat->ino = dentry->d_inode->i_ino; |
|---|
| 130 | | - } else if (lower_layer && lower_layer->fsid) { |
|---|
| 144 | + } else { |
|---|
| 131 | 145 | /* |
|---|
| 132 | 146 | * For non-samefs setup, if we cannot map all layers st_ino |
|---|
| 133 | 147 | * to a unified address space, we need to make sure that st_dev |
|---|
| 134 | | - * is unique per lower fs. Upper layer uses real st_dev and |
|---|
| 135 | | - * lower layers use the unique anonymous bdev assigned to the |
|---|
| 136 | | - * lower fs. |
|---|
| 148 | + * is unique per underlying fs, so we use the unique anonymous |
|---|
| 149 | + * bdev assigned to the underlying fs. |
|---|
| 137 | 150 | */ |
|---|
| 138 | | - stat->dev = lower_layer->fs->pseudo_dev; |
|---|
| 151 | + stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; |
|---|
| 139 | 152 | } |
|---|
| 140 | 153 | |
|---|
| 141 | 154 | return 0; |
|---|
| .. | .. |
|---|
| 149 | 162 | struct path realpath; |
|---|
| 150 | 163 | const struct cred *old_cred; |
|---|
| 151 | 164 | bool is_dir = S_ISDIR(dentry->d_inode->i_mode); |
|---|
| 152 | | - bool samefs = ovl_same_sb(dentry->d_sb); |
|---|
| 153 | | - struct ovl_layer *lower_layer = NULL; |
|---|
| 165 | + int fsid = 0; |
|---|
| 154 | 166 | int err; |
|---|
| 155 | 167 | bool metacopy_blocks = false; |
|---|
| 156 | 168 | |
|---|
| .. | .. |
|---|
| 171 | 183 | * If lower filesystem supports NFS file handles, this also guaranties |
|---|
| 172 | 184 | * persistent st_ino across mount cycle. |
|---|
| 173 | 185 | */ |
|---|
| 174 | | - if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { |
|---|
| 186 | + if (!is_dir || ovl_same_dev(dentry->d_sb)) { |
|---|
| 175 | 187 | if (!OVL_TYPE_UPPER(type)) { |
|---|
| 176 | | - lower_layer = ovl_layer_lower(dentry); |
|---|
| 188 | + fsid = ovl_layer_lower(dentry)->fsid; |
|---|
| 177 | 189 | } else if (OVL_TYPE_ORIGIN(type)) { |
|---|
| 178 | 190 | struct kstat lowerstat; |
|---|
| 179 | 191 | u32 lowermask = STATX_INO | STATX_BLOCKS | |
|---|
| .. | .. |
|---|
| 203 | 215 | if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || |
|---|
| 204 | 216 | (!ovl_verify_lower(dentry->d_sb) && |
|---|
| 205 | 217 | (is_dir || lowerstat.nlink == 1))) { |
|---|
| 206 | | - lower_layer = ovl_layer_lower(dentry); |
|---|
| 207 | | - /* |
|---|
| 208 | | - * Cannot use origin st_dev;st_ino because |
|---|
| 209 | | - * origin inode content may differ from overlay |
|---|
| 210 | | - * inode content. |
|---|
| 211 | | - */ |
|---|
| 212 | | - if (samefs || lower_layer->fsid) |
|---|
| 213 | | - stat->ino = lowerstat.ino; |
|---|
| 218 | + fsid = ovl_layer_lower(dentry)->fsid; |
|---|
| 219 | + stat->ino = lowerstat.ino; |
|---|
| 214 | 220 | } |
|---|
| 215 | 221 | |
|---|
| 216 | 222 | /* |
|---|
| .. | .. |
|---|
| 244 | 250 | } |
|---|
| 245 | 251 | } |
|---|
| 246 | 252 | |
|---|
| 247 | | - err = ovl_map_dev_ino(dentry, stat, lower_layer); |
|---|
| 253 | + err = ovl_map_dev_ino(dentry, stat, fsid); |
|---|
| 248 | 254 | if (err) |
|---|
| 249 | 255 | goto out; |
|---|
| 250 | 256 | |
|---|
| .. | .. |
|---|
| 266 | 272 | stat->nlink = dentry->d_inode->i_nlink; |
|---|
| 267 | 273 | |
|---|
| 268 | 274 | out: |
|---|
| 269 | | - ovl_revert_creds(old_cred); |
|---|
| 275 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 270 | 276 | |
|---|
| 271 | 277 | return err; |
|---|
| 272 | 278 | } |
|---|
| .. | .. |
|---|
| 300 | 306 | mask |= MAY_READ; |
|---|
| 301 | 307 | } |
|---|
| 302 | 308 | err = inode_permission(realinode, mask); |
|---|
| 303 | | - ovl_revert_creds(old_cred); |
|---|
| 309 | + ovl_revert_creds(inode->i_sb, old_cred); |
|---|
| 304 | 310 | |
|---|
| 305 | 311 | return err; |
|---|
| 306 | 312 | } |
|---|
| .. | .. |
|---|
| 317 | 323 | |
|---|
| 318 | 324 | old_cred = ovl_override_creds(dentry->d_sb); |
|---|
| 319 | 325 | p = vfs_get_link(ovl_dentry_real(dentry), done); |
|---|
| 320 | | - ovl_revert_creds(old_cred); |
|---|
| 326 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 321 | 327 | return p; |
|---|
| 322 | 328 | } |
|---|
| 323 | 329 | |
|---|
| 324 | | -bool ovl_is_private_xattr(const char *name) |
|---|
| 330 | +bool ovl_is_private_xattr(struct super_block *sb, const char *name) |
|---|
| 325 | 331 | { |
|---|
| 326 | 332 | return strncmp(name, OVL_XATTR_PREFIX, |
|---|
| 327 | 333 | sizeof(OVL_XATTR_PREFIX) - 1) == 0; |
|---|
| .. | .. |
|---|
| 362 | 368 | WARN_ON(flags != XATTR_REPLACE); |
|---|
| 363 | 369 | err = vfs_removexattr(realdentry, name); |
|---|
| 364 | 370 | } |
|---|
| 365 | | - ovl_revert_creds(old_cred); |
|---|
| 371 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 366 | 372 | |
|---|
| 367 | 373 | /* copy c/mtime */ |
|---|
| 368 | 374 | ovl_copyattr(d_inode(realdentry), inode); |
|---|
| .. | .. |
|---|
| 373 | 379 | return err; |
|---|
| 374 | 380 | } |
|---|
| 375 | 381 | |
|---|
| 376 | | -int __ovl_xattr_get(struct dentry *dentry, struct inode *inode, |
|---|
| 377 | | - const char *name, void *value, size_t size) |
|---|
| 378 | | -{ |
|---|
| 379 | | - ssize_t res; |
|---|
| 380 | | - const struct cred *old_cred; |
|---|
| 381 | | - struct dentry *realdentry = |
|---|
| 382 | | - ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); |
|---|
| 383 | | - |
|---|
| 384 | | - old_cred = ovl_override_creds(dentry->d_sb); |
|---|
| 385 | | - res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value, |
|---|
| 386 | | - size); |
|---|
| 387 | | - ovl_revert_creds(old_cred); |
|---|
| 388 | | - return res; |
|---|
| 389 | | -} |
|---|
| 390 | | - |
|---|
| 391 | 382 | int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, |
|---|
| 392 | | - void *value, size_t size) |
|---|
| 383 | + void *value, size_t size, int flags) |
|---|
| 393 | 384 | { |
|---|
| 394 | 385 | ssize_t res; |
|---|
| 395 | 386 | const struct cred *old_cred; |
|---|
| .. | .. |
|---|
| 397 | 388 | ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); |
|---|
| 398 | 389 | |
|---|
| 399 | 390 | old_cred = ovl_override_creds(dentry->d_sb); |
|---|
| 400 | | - res = vfs_getxattr(realdentry, name, value, size); |
|---|
| 401 | | - ovl_revert_creds(old_cred); |
|---|
| 391 | + res = __vfs_getxattr(realdentry, d_inode(realdentry), name, |
|---|
| 392 | + value, size, flags); |
|---|
| 393 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 402 | 394 | return res; |
|---|
| 403 | 395 | } |
|---|
| 404 | 396 | |
|---|
| 405 | | -static bool ovl_can_list(const char *s) |
|---|
| 397 | +static bool ovl_can_list(struct super_block *sb, const char *s) |
|---|
| 406 | 398 | { |
|---|
| 399 | + /* Never list private (.overlay) */ |
|---|
| 400 | + if (ovl_is_private_xattr(sb, s)) |
|---|
| 401 | + return false; |
|---|
| 402 | + |
|---|
| 407 | 403 | /* List all non-trusted xatts */ |
|---|
| 408 | 404 | if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) |
|---|
| 409 | 405 | return true; |
|---|
| 410 | 406 | |
|---|
| 411 | | - /* Never list trusted.overlay, list other trusted for superuser only */ |
|---|
| 412 | | - return !ovl_is_private_xattr(s) && |
|---|
| 413 | | - ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); |
|---|
| 407 | + /* list other trusted for superuser only */ |
|---|
| 408 | + return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); |
|---|
| 414 | 409 | } |
|---|
| 415 | 410 | |
|---|
| 416 | 411 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) |
|---|
| .. | .. |
|---|
| 423 | 418 | |
|---|
| 424 | 419 | old_cred = ovl_override_creds(dentry->d_sb); |
|---|
| 425 | 420 | res = vfs_listxattr(realdentry, list, size); |
|---|
| 426 | | - ovl_revert_creds(old_cred); |
|---|
| 421 | + ovl_revert_creds(dentry->d_sb, old_cred); |
|---|
| 427 | 422 | if (res <= 0 || size == 0) |
|---|
| 428 | 423 | return res; |
|---|
| 429 | 424 | |
|---|
| .. | .. |
|---|
| 436 | 431 | return -EIO; |
|---|
| 437 | 432 | |
|---|
| 438 | 433 | len -= slen; |
|---|
| 439 | | - if (!ovl_can_list(s)) { |
|---|
| 434 | + if (!ovl_can_list(dentry->d_sb, s)) { |
|---|
| 440 | 435 | res -= slen; |
|---|
| 441 | 436 | memmove(s, s + slen, len); |
|---|
| 442 | 437 | } else { |
|---|
| .. | .. |
|---|
| 458 | 453 | |
|---|
| 459 | 454 | old_cred = ovl_override_creds(inode->i_sb); |
|---|
| 460 | 455 | acl = get_acl(realinode, type); |
|---|
| 461 | | - ovl_revert_creds(old_cred); |
|---|
| 456 | + ovl_revert_creds(inode->i_sb, old_cred); |
|---|
| 462 | 457 | |
|---|
| 463 | 458 | return acl; |
|---|
| 464 | 459 | } |
|---|
| .. | .. |
|---|
| 468 | 463 | if (flags & S_ATIME) { |
|---|
| 469 | 464 | struct ovl_fs *ofs = inode->i_sb->s_fs_info; |
|---|
| 470 | 465 | struct path upperpath = { |
|---|
| 471 | | - .mnt = ofs->upper_mnt, |
|---|
| 466 | + .mnt = ovl_upper_mnt(ofs), |
|---|
| 472 | 467 | .dentry = ovl_upperdentry_dereference(OVL_I(inode)), |
|---|
| 473 | 468 | }; |
|---|
| 474 | 469 | |
|---|
| .. | .. |
|---|
| 491 | 486 | return -EOPNOTSUPP; |
|---|
| 492 | 487 | |
|---|
| 493 | 488 | old_cred = ovl_override_creds(inode->i_sb); |
|---|
| 494 | | - |
|---|
| 495 | | - if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) |
|---|
| 496 | | - filemap_write_and_wait(realinode->i_mapping); |
|---|
| 497 | | - |
|---|
| 498 | 489 | err = realinode->i_op->fiemap(realinode, fieinfo, start, len); |
|---|
| 499 | | - ovl_revert_creds(old_cred); |
|---|
| 490 | + ovl_revert_creds(inode->i_sb, old_cred); |
|---|
| 500 | 491 | |
|---|
| 501 | 492 | return err; |
|---|
| 502 | 493 | } |
|---|
| .. | .. |
|---|
| 535 | 526 | |
|---|
| 536 | 527 | /* |
|---|
| 537 | 528 | * It is possible to stack overlayfs instance on top of another |
|---|
| 538 | | - * overlayfs instance as lower layer. We need to annonate the |
|---|
| 529 | + * overlayfs instance as lower layer. We need to annotate the |
|---|
| 539 | 530 | * stackable i_mutex locks according to stack level of the super |
|---|
| 540 | 531 | * block instance. An overlayfs instance can never be in stack |
|---|
| 541 | 532 | * depth 0 (there is always a real fs below it). An overlayfs |
|---|
| .. | .. |
|---|
| 547 | 538 | * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) |
|---|
| 548 | 539 | * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) |
|---|
| 549 | 540 | * [...] &type->i_mutex_dir_key (stack_depth=0) |
|---|
| 541 | + * |
|---|
| 542 | + * Locking order w.r.t ovl_want_write() is important for nested overlayfs. |
|---|
| 543 | + * |
|---|
| 544 | + * This chain is valid: |
|---|
| 545 | + * - inode->i_rwsem (inode_lock[2]) |
|---|
| 546 | + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) |
|---|
| 547 | + * - OVL_I(inode)->lock (ovl_inode_lock[2]) |
|---|
| 548 | + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) |
|---|
| 549 | + * |
|---|
| 550 | + * And this chain is valid: |
|---|
| 551 | + * - inode->i_rwsem (inode_lock[2]) |
|---|
| 552 | + * - OVL_I(inode)->lock (ovl_inode_lock[2]) |
|---|
| 553 | + * - lowerinode->i_rwsem (inode_lock[1]) |
|---|
| 554 | + * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) |
|---|
| 555 | + * |
|---|
| 556 | + * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is |
|---|
| 557 | + * held, because it is in reverse order of the non-nested case using the same |
|---|
| 558 | + * upper fs: |
|---|
| 559 | + * - inode->i_rwsem (inode_lock[1]) |
|---|
| 560 | + * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) |
|---|
| 561 | + * - OVL_I(inode)->lock (ovl_inode_lock[1]) |
|---|
| 550 | 562 | */ |
|---|
| 551 | 563 | #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH |
|---|
| 552 | 564 | |
|---|
| .. | .. |
|---|
| 571 | 583 | #endif |
|---|
| 572 | 584 | } |
|---|
| 573 | 585 | |
|---|
| 574 | | -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, |
|---|
| 575 | | - unsigned long ino, int fsid) |
|---|
| 586 | +static void ovl_next_ino(struct inode *inode) |
|---|
| 587 | +{ |
|---|
| 588 | + struct ovl_fs *ofs = inode->i_sb->s_fs_info; |
|---|
| 589 | + |
|---|
| 590 | + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); |
|---|
| 591 | + if (unlikely(!inode->i_ino)) |
|---|
| 592 | + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); |
|---|
| 593 | +} |
|---|
| 594 | + |
|---|
| 595 | +static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) |
|---|
| 576 | 596 | { |
|---|
| 577 | 597 | int xinobits = ovl_xino_bits(inode->i_sb); |
|---|
| 598 | + unsigned int xinoshift = 64 - xinobits; |
|---|
| 578 | 599 | |
|---|
| 579 | 600 | /* |
|---|
| 580 | 601 | * When d_ino is consistent with st_ino (samefs or i_ino has enough |
|---|
| 581 | 602 | * bits to encode layer), set the same value used for st_ino to i_ino, |
|---|
| 582 | 603 | * so inode number exposed via /proc/locks and a like will be |
|---|
| 583 | 604 | * consistent with d_ino and st_ino values. An i_ino value inconsistent |
|---|
| 584 | | - * with d_ino also causes nfsd readdirplus to fail. When called from |
|---|
| 585 | | - * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real |
|---|
| 586 | | - * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). |
|---|
| 605 | + * with d_ino also causes nfsd readdirplus to fail. |
|---|
| 587 | 606 | */ |
|---|
| 588 | | - if (ovl_same_sb(inode->i_sb) || xinobits) { |
|---|
| 589 | | - inode->i_ino = ino; |
|---|
| 590 | | - if (xinobits && fsid && !(ino >> (64 - xinobits))) |
|---|
| 591 | | - inode->i_ino |= (unsigned long)fsid << (64 - xinobits); |
|---|
| 592 | | - } else { |
|---|
| 593 | | - inode->i_ino = get_next_ino(); |
|---|
| 607 | + inode->i_ino = ino; |
|---|
| 608 | + if (ovl_same_fs(inode->i_sb)) { |
|---|
| 609 | + return; |
|---|
| 610 | + } else if (xinobits && likely(!(ino >> xinoshift))) { |
|---|
| 611 | + inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); |
|---|
| 612 | + return; |
|---|
| 594 | 613 | } |
|---|
| 614 | + |
|---|
| 615 | + /* |
|---|
| 616 | + * For directory inodes on non-samefs with xino disabled or xino |
|---|
| 617 | + * overflow, we allocate a non-persistent inode number, to be used for |
|---|
| 618 | + * resolving st_ino collisions in ovl_map_dev_ino(). |
|---|
| 619 | + * |
|---|
| 620 | + * To avoid ino collision with legitimate xino values from upper |
|---|
| 621 | + * layer (fsid 0), use the lowest xinobit to map the non |
|---|
| 622 | + * persistent inode numbers to the unified st_ino address space. |
|---|
| 623 | + */ |
|---|
| 624 | + if (S_ISDIR(inode->i_mode)) { |
|---|
| 625 | + ovl_next_ino(inode); |
|---|
| 626 | + if (xinobits) { |
|---|
| 627 | + inode->i_ino &= ~0UL >> xinobits; |
|---|
| 628 | + inode->i_ino |= 1UL << xinoshift; |
|---|
| 629 | + } |
|---|
| 630 | + } |
|---|
| 631 | +} |
|---|
| 632 | + |
|---|
| 633 | +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, |
|---|
| 634 | + unsigned long ino, int fsid) |
|---|
| 635 | +{ |
|---|
| 636 | + struct inode *realinode; |
|---|
| 637 | + |
|---|
| 638 | + if (oip->upperdentry) |
|---|
| 639 | + OVL_I(inode)->__upperdentry = oip->upperdentry; |
|---|
| 640 | + if (oip->lowerpath && oip->lowerpath->dentry) |
|---|
| 641 | + OVL_I(inode)->lower = igrab(d_inode(oip->lowerpath->dentry)); |
|---|
| 642 | + if (oip->lowerdata) |
|---|
| 643 | + OVL_I(inode)->lowerdata = igrab(d_inode(oip->lowerdata)); |
|---|
| 644 | + |
|---|
| 645 | + realinode = ovl_inode_real(inode); |
|---|
| 646 | + ovl_copyattr(realinode, inode); |
|---|
| 647 | + ovl_copyflags(realinode, inode); |
|---|
| 648 | + ovl_map_ino(inode, ino, fsid); |
|---|
| 649 | +} |
|---|
| 650 | + |
|---|
| 651 | +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) |
|---|
| 652 | +{ |
|---|
| 595 | 653 | inode->i_mode = mode; |
|---|
| 596 | 654 | inode->i_flags |= S_NOCMTIME; |
|---|
| 597 | 655 | #ifdef CONFIG_FS_POSIX_ACL |
|---|
| .. | .. |
|---|
| 670 | 728 | if (WARN_ON(len >= sizeof(buf))) |
|---|
| 671 | 729 | return -EIO; |
|---|
| 672 | 730 | |
|---|
| 673 | | - return ovl_do_setxattr(ovl_dentry_upper(dentry), |
|---|
| 674 | | - OVL_XATTR_NLINK, buf, len, 0); |
|---|
| 731 | + return ovl_do_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry), |
|---|
| 732 | + OVL_XATTR_NLINK, buf, len); |
|---|
| 675 | 733 | } |
|---|
| 676 | 734 | |
|---|
| 677 | 735 | int ovl_set_nlink_upper(struct dentry *dentry) |
|---|
| .. | .. |
|---|
| 684 | 742 | return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); |
|---|
| 685 | 743 | } |
|---|
| 686 | 744 | |
|---|
| 687 | | -unsigned int ovl_get_nlink(struct dentry *lowerdentry, |
|---|
| 745 | +unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, |
|---|
| 688 | 746 | struct dentry *upperdentry, |
|---|
| 689 | 747 | unsigned int fallback) |
|---|
| 690 | 748 | { |
|---|
| .. | .. |
|---|
| 696 | 754 | if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) |
|---|
| 697 | 755 | return fallback; |
|---|
| 698 | 756 | |
|---|
| 699 | | - err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); |
|---|
| 757 | + err = ovl_do_getxattr(ofs, upperdentry, OVL_XATTR_NLINK, |
|---|
| 758 | + &buf, sizeof(buf) - 1); |
|---|
| 700 | 759 | if (err < 0) |
|---|
| 701 | 760 | goto fail; |
|---|
| 702 | 761 | |
|---|
| .. | .. |
|---|
| 718 | 777 | return nlink; |
|---|
| 719 | 778 | |
|---|
| 720 | 779 | fail: |
|---|
| 721 | | - pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", |
|---|
| 780 | + pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", |
|---|
| 722 | 781 | upperdentry, err); |
|---|
| 723 | 782 | return fallback; |
|---|
| 724 | 783 | } |
|---|
| .. | .. |
|---|
| 729 | 788 | |
|---|
| 730 | 789 | inode = new_inode(sb); |
|---|
| 731 | 790 | if (inode) |
|---|
| 732 | | - ovl_fill_inode(inode, mode, rdev, 0, 0); |
|---|
| 791 | + ovl_fill_inode(inode, mode, rdev); |
|---|
| 733 | 792 | |
|---|
| 734 | 793 | return inode; |
|---|
| 735 | 794 | } |
|---|
| .. | .. |
|---|
| 853 | 912 | * Does overlay inode need to be hashed by lower inode? |
|---|
| 854 | 913 | */ |
|---|
| 855 | 914 | static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, |
|---|
| 856 | | - struct dentry *lower, struct dentry *index) |
|---|
| 915 | + struct dentry *lower, bool index) |
|---|
| 857 | 916 | { |
|---|
| 858 | 917 | struct ovl_fs *ofs = sb->s_fs_info; |
|---|
| 859 | 918 | |
|---|
| .. | .. |
|---|
| 866 | 925 | return true; |
|---|
| 867 | 926 | |
|---|
| 868 | 927 | /* Yes, if won't be copied up */ |
|---|
| 869 | | - if (!ofs->upper_mnt) |
|---|
| 928 | + if (!ovl_upper_mnt(ofs)) |
|---|
| 870 | 929 | return true; |
|---|
| 871 | 930 | |
|---|
| 872 | 931 | /* No, if lower hardlink is or will be broken on copy up */ |
|---|
| .. | .. |
|---|
| 894 | 953 | struct inode *ovl_get_inode(struct super_block *sb, |
|---|
| 895 | 954 | struct ovl_inode_params *oip) |
|---|
| 896 | 955 | { |
|---|
| 956 | + struct ovl_fs *ofs = OVL_FS(sb); |
|---|
| 897 | 957 | struct dentry *upperdentry = oip->upperdentry; |
|---|
| 898 | 958 | struct ovl_path *lowerpath = oip->lowerpath; |
|---|
| 899 | 959 | struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; |
|---|
| .. | .. |
|---|
| 902 | 962 | bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, |
|---|
| 903 | 963 | oip->index); |
|---|
| 904 | 964 | int fsid = bylower ? lowerpath->layer->fsid : 0; |
|---|
| 905 | | - bool is_dir, metacopy = false; |
|---|
| 965 | + bool is_dir; |
|---|
| 906 | 966 | unsigned long ino = 0; |
|---|
| 907 | 967 | int err = oip->newinode ? -EEXIST : -ENOMEM; |
|---|
| 908 | 968 | |
|---|
| .. | .. |
|---|
| 941 | 1001 | |
|---|
| 942 | 1002 | /* Recalculate nlink for non-dir due to indexing */ |
|---|
| 943 | 1003 | if (!is_dir) |
|---|
| 944 | | - nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); |
|---|
| 1004 | + nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry, |
|---|
| 1005 | + nlink); |
|---|
| 945 | 1006 | set_nlink(inode, nlink); |
|---|
| 946 | 1007 | ino = key->i_ino; |
|---|
| 947 | 1008 | } else { |
|---|
| .. | .. |
|---|
| 954 | 1015 | ino = realinode->i_ino; |
|---|
| 955 | 1016 | fsid = lowerpath->layer->fsid; |
|---|
| 956 | 1017 | } |
|---|
| 957 | | - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); |
|---|
| 958 | | - ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); |
|---|
| 1018 | + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); |
|---|
| 1019 | + ovl_inode_init(inode, oip, ino, fsid); |
|---|
| 959 | 1020 | |
|---|
| 960 | | - if (upperdentry && ovl_is_impuredir(upperdentry)) |
|---|
| 1021 | + if (upperdentry && ovl_is_impuredir(sb, upperdentry)) |
|---|
| 961 | 1022 | ovl_set_flag(OVL_IMPURE, inode); |
|---|
| 962 | 1023 | |
|---|
| 963 | 1024 | if (oip->index) |
|---|
| 964 | 1025 | ovl_set_flag(OVL_INDEX, inode); |
|---|
| 965 | | - |
|---|
| 966 | | - if (upperdentry) { |
|---|
| 967 | | - err = ovl_check_metacopy_xattr(upperdentry); |
|---|
| 968 | | - if (err < 0) |
|---|
| 969 | | - goto out_err; |
|---|
| 970 | | - metacopy = err; |
|---|
| 971 | | - if (!metacopy) |
|---|
| 972 | | - ovl_set_flag(OVL_UPPERDATA, inode); |
|---|
| 973 | | - } |
|---|
| 974 | 1026 | |
|---|
| 975 | 1027 | OVL_I(inode)->redirect = oip->redirect; |
|---|
| 976 | 1028 | |
|---|
| .. | .. |
|---|
| 980 | 1032 | /* Check for non-merge dir that may have whiteouts */ |
|---|
| 981 | 1033 | if (is_dir) { |
|---|
| 982 | 1034 | if (((upperdentry && lowerdentry) || oip->numlower > 1) || |
|---|
| 983 | | - ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { |
|---|
| 1035 | + ovl_check_origin_xattr(ofs, upperdentry ?: lowerdentry)) { |
|---|
| 984 | 1036 | ovl_set_flag(OVL_WHITEOUTS, inode); |
|---|
| 985 | 1037 | } |
|---|
| 986 | 1038 | } |
|---|
| .. | .. |
|---|
| 991 | 1043 | return inode; |
|---|
| 992 | 1044 | |
|---|
| 993 | 1045 | out_err: |
|---|
| 994 | | - pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); |
|---|
| 1046 | + pr_warn_ratelimited("failed to get inode (%i)\n", err); |
|---|
| 995 | 1047 | inode = ERR_PTR(err); |
|---|
| 996 | 1048 | goto out; |
|---|
| 997 | 1049 | } |
|---|