hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/locks.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * linux/fs/locks.c
34 *
....@@ -11,11 +12,11 @@
1112 *
1213 * Miscellaneous edits, and a total rewrite of posix_lock_file() code.
1314 * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
14
- *
15
+ *
1516 * Converted file_lock_table to a linked list from an array, which eliminates
1617 * the limits on how many active file locks are open.
1718 * Chad Page (pageone@netcom.com), November 27, 1994
18
- *
19
+ *
1920 * Removed dependency on file descriptors. dup()'ed file descriptors now
2021 * get the same locks as the original file descriptors, and a close() on
2122 * any file descriptor removes ALL the locks on the file for the current
....@@ -41,7 +42,7 @@
4142 * with a file pointer (filp). As a result they can be shared by a parent
4243 * process and its children after a fork(). They are removed when the last
4344 * file descriptor referring to the file pointer is closed (unless explicitly
44
- * unlocked).
45
+ * unlocked).
4546 *
4647 * FL_FLOCK locks never deadlock, an existing lock is always removed before
4748 * upgrading from shared to exclusive (or vice versa). When this happens
....@@ -50,7 +51,7 @@
5051 * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
5152 *
5253 * Removed some race conditions in flock_lock_file(), marked other possible
53
- * races. Just grep for FIXME to see them.
54
+ * races. Just grep for FIXME to see them.
5455 * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
5556 *
5657 * Addressed Dmitry's concerns. Deadlock checking no longer recursive.
....@@ -60,7 +61,7 @@
6061 *
6162 * Initial implementation of mandatory locks. SunOS turned out to be
6263 * a rotten model, so I implemented the "obvious" semantics.
63
- * See 'Documentation/filesystems/mandatory-locking.txt' for details.
64
+ * See 'Documentation/filesystems/mandatory-locking.rst' for details.
6465 * Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
6566 *
6667 * Don't allow mandatory locks on mmap()'ed files. Added simple functions to
....@@ -112,6 +113,46 @@
112113 * Leases and LOCK_MAND
113114 * Matthew Wilcox <willy@debian.org>, June, 2000.
114115 * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
116
+ *
117
+ * Locking conflicts and dependencies:
118
+ * If multiple threads attempt to lock the same byte (or flock the same file)
119
+ * only one can be granted the lock, and other must wait their turn.
120
+ * The first lock has been "applied" or "granted", the others are "waiting"
121
+ * and are "blocked" by the "applied" lock..
122
+ *
123
+ * Waiting and applied locks are all kept in trees whose properties are:
124
+ *
125
+ * - the root of a tree may be an applied or waiting lock.
126
+ * - every other node in the tree is a waiting lock that
127
+ * conflicts with every ancestor of that node.
128
+ *
129
+ * Every such tree begins life as a waiting singleton which obviously
130
+ * satisfies the above properties.
131
+ *
132
+ * The only ways we modify trees preserve these properties:
133
+ *
134
+ * 1. We may add a new leaf node, but only after first verifying that it
135
+ * conflicts with all of its ancestors.
136
+ * 2. We may remove the root of a tree, creating a new singleton
137
+ * tree from the root and N new trees rooted in the immediate
138
+ * children.
139
+ * 3. If the root of a tree is not currently an applied lock, we may
140
+ * apply it (if possible).
141
+ * 4. We may upgrade the root of the tree (either extend its range,
142
+ * or upgrade its entire range from read to write).
143
+ *
144
+ * When an applied lock is modified in a way that reduces or downgrades any
145
+ * part of its range, we remove all its children (2 above). This particularly
146
+ * happens when a lock is unlocked.
147
+ *
148
+ * For each of those child trees we "wake up" the thread which is
149
+ * waiting for the lock so it can continue handling as follows: if the
150
+ * root of the tree applies, we do so (3). If it doesn't, it must
151
+ * conflict with some applied lock. We remove (wake up) all of its children
152
+ * (2), and add it is a new leaf to the tree rooted in the applied
153
+ * lock (1). We then repeat the process recursively with those
154
+ * children.
155
+ *
115156 */
116157
117158 #include <linux/capability.h>
....@@ -171,6 +212,7 @@
171212 static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
172213 DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
173214
215
+
174216 /*
175217 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
176218 * It is protected by blocked_lock_lock.
....@@ -189,9 +231,9 @@
189231 * This lock protects the blocked_hash. Generally, if you're accessing it, you
190232 * want to be holding this lock.
191233 *
192
- * In addition, it also protects the fl->fl_block list, and the fl->fl_next
193
- * pointer for file_lock structures that are acting as lock requests (in
194
- * contrast to those that are acting as records of acquired locks).
234
+ * In addition, it also protects the fl->fl_blocked_requests list, and the
235
+ * fl->fl_blocker pointer for file_lock structures that are acting as lock
236
+ * requests (in contrast to those that are acting as records of acquired locks).
195237 *
196238 * Note that when we acquire this lock in order to change the above fields,
197239 * we often hold the flc_lock as well. In certain cases, when reading the fields
....@@ -293,7 +335,8 @@
293335 {
294336 INIT_HLIST_NODE(&fl->fl_link);
295337 INIT_LIST_HEAD(&fl->fl_list);
296
- INIT_LIST_HEAD(&fl->fl_block);
338
+ INIT_LIST_HEAD(&fl->fl_blocked_requests);
339
+ INIT_LIST_HEAD(&fl->fl_blocked_member);
297340 init_waitqueue_head(&fl->fl_wait);
298341 }
299342
....@@ -311,6 +354,12 @@
311354
312355 void locks_release_private(struct file_lock *fl)
313356 {
357
+ BUG_ON(waitqueue_active(&fl->fl_wait));
358
+ BUG_ON(!list_empty(&fl->fl_list));
359
+ BUG_ON(!list_empty(&fl->fl_blocked_requests));
360
+ BUG_ON(!list_empty(&fl->fl_blocked_member));
361
+ BUG_ON(!hlist_unhashed(&fl->fl_link));
362
+
314363 if (fl->fl_ops) {
315364 if (fl->fl_ops->fl_release_private)
316365 fl->fl_ops->fl_release_private(fl);
....@@ -330,11 +379,6 @@
330379 /* Free a lock which is not in use. */
331380 void locks_free_lock(struct file_lock *fl)
332381 {
333
- BUG_ON(waitqueue_active(&fl->fl_wait));
334
- BUG_ON(!list_empty(&fl->fl_list));
335
- BUG_ON(!list_empty(&fl->fl_block));
336
- BUG_ON(!hlist_unhashed(&fl->fl_link));
337
-
338382 locks_release_private(fl);
339383 kmem_cache_free(filelock_cache, fl);
340384 }
....@@ -357,7 +401,6 @@
357401 memset(fl, 0, sizeof(struct file_lock));
358402 locks_init_lock_heads(fl);
359403 }
360
-
361404 EXPORT_SYMBOL(locks_init_lock);
362405
363406 /*
....@@ -397,8 +440,25 @@
397440 fl->fl_ops->fl_copy_lock(new, fl);
398441 }
399442 }
400
-
401443 EXPORT_SYMBOL(locks_copy_lock);
444
+
445
+static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
446
+{
447
+ struct file_lock *f;
448
+
449
+ /*
450
+ * As ctx->flc_lock is held, new requests cannot be added to
451
+ * ->fl_blocked_requests, so we don't need a lock to check if it
452
+ * is empty.
453
+ */
454
+ if (list_empty(&fl->fl_blocked_requests))
455
+ return;
456
+ spin_lock(&blocked_lock_lock);
457
+ list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
458
+ list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
459
+ f->fl_blocker = new;
460
+ spin_unlock(&blocked_lock_lock);
461
+}
402462
403463 static inline int flock_translate_cmd(int cmd) {
404464 if (cmd & LOCK_MAND)
....@@ -416,17 +476,20 @@
416476
417477 /* Fill in a file_lock structure with an appropriate FLOCK lock. */
418478 static struct file_lock *
419
-flock_make_lock(struct file *filp, unsigned int cmd)
479
+flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
420480 {
421
- struct file_lock *fl;
422481 int type = flock_translate_cmd(cmd);
423482
424483 if (type < 0)
425484 return ERR_PTR(type);
426
-
427
- fl = locks_alloc_lock();
428
- if (fl == NULL)
429
- return ERR_PTR(-ENOMEM);
485
+
486
+ if (fl == NULL) {
487
+ fl = locks_alloc_lock();
488
+ if (fl == NULL)
489
+ return ERR_PTR(-ENOMEM);
490
+ } else {
491
+ locks_init_lock(fl);
492
+ }
430493
431494 fl->fl_file = filp;
432495 fl->fl_owner = filp;
....@@ -434,7 +497,7 @@
434497 fl->fl_flags = FL_FLOCK;
435498 fl->fl_type = type;
436499 fl->fl_end = OFFSET_MAX;
437
-
500
+
438501 return fl;
439502 }
440503
....@@ -596,9 +659,6 @@
596659 */
597660 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
598661 {
599
- if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
600
- return fl2->fl_lmops == fl1->fl_lmops &&
601
- fl1->fl_lmops->lm_compare_owner(fl1, fl2);
602662 return fl1->fl_owner == fl2->fl_owner;
603663 }
604664
....@@ -639,8 +699,6 @@
639699 static unsigned long
640700 posix_owner_key(struct file_lock *fl)
641701 {
642
- if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
643
- return fl->fl_lmops->lm_owner_key(fl);
644702 return (unsigned long)fl->fl_owner;
645703 }
646704
....@@ -666,16 +724,81 @@
666724 static void __locks_delete_block(struct file_lock *waiter)
667725 {
668726 locks_delete_global_blocked(waiter);
669
- list_del_init(&waiter->fl_block);
670
- waiter->fl_next = NULL;
727
+ list_del_init(&waiter->fl_blocked_member);
671728 }
672729
673
-static void locks_delete_block(struct file_lock *waiter)
730
+static void __locks_wake_up_blocks(struct file_lock *blocker)
674731 {
675
- spin_lock(&blocked_lock_lock);
676
- __locks_delete_block(waiter);
677
- spin_unlock(&blocked_lock_lock);
732
+ while (!list_empty(&blocker->fl_blocked_requests)) {
733
+ struct file_lock *waiter;
734
+
735
+ waiter = list_first_entry(&blocker->fl_blocked_requests,
736
+ struct file_lock, fl_blocked_member);
737
+ __locks_delete_block(waiter);
738
+ if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
739
+ waiter->fl_lmops->lm_notify(waiter);
740
+ else
741
+ wake_up(&waiter->fl_wait);
742
+
743
+ /*
744
+ * The setting of fl_blocker to NULL marks the "done"
745
+ * point in deleting a block. Paired with acquire at the top
746
+ * of locks_delete_block().
747
+ */
748
+ smp_store_release(&waiter->fl_blocker, NULL);
749
+ }
678750 }
751
+
752
+/**
753
+ * locks_delete_lock - stop waiting for a file lock
754
+ * @waiter: the lock which was waiting
755
+ *
756
+ * lockd/nfsd need to disconnect the lock while working on it.
757
+ */
758
+int locks_delete_block(struct file_lock *waiter)
759
+{
760
+ int status = -ENOENT;
761
+
762
+ /*
763
+ * If fl_blocker is NULL, it won't be set again as this thread "owns"
764
+ * the lock and is the only one that might try to claim the lock.
765
+ *
766
+ * We use acquire/release to manage fl_blocker so that we can
767
+ * optimize away taking the blocked_lock_lock in many cases.
768
+ *
769
+ * The smp_load_acquire guarantees two things:
770
+ *
771
+ * 1/ that fl_blocked_requests can be tested locklessly. If something
772
+ * was recently added to that list it must have been in a locked region
773
+ * *before* the locked region when fl_blocker was set to NULL.
774
+ *
775
+ * 2/ that no other thread is accessing 'waiter', so it is safe to free
776
+ * it. __locks_wake_up_blocks is careful not to touch waiter after
777
+ * fl_blocker is released.
778
+ *
779
+ * If a lockless check of fl_blocker shows it to be NULL, we know that
780
+ * no new locks can be inserted into its fl_blocked_requests list, and
781
+ * can avoid doing anything further if the list is empty.
782
+ */
783
+ if (!smp_load_acquire(&waiter->fl_blocker) &&
784
+ list_empty(&waiter->fl_blocked_requests))
785
+ return status;
786
+
787
+ spin_lock(&blocked_lock_lock);
788
+ if (waiter->fl_blocker)
789
+ status = 0;
790
+ __locks_wake_up_blocks(waiter);
791
+ __locks_delete_block(waiter);
792
+
793
+ /*
794
+ * The setting of fl_blocker to NULL marks the "done" point in deleting
795
+ * a block. Paired with acquire at the top of this function.
796
+ */
797
+ smp_store_release(&waiter->fl_blocker, NULL);
798
+ spin_unlock(&blocked_lock_lock);
799
+ return status;
800
+}
801
+EXPORT_SYMBOL(locks_delete_block);
679802
680803 /* Insert waiter into blocker's block list.
681804 * We use a circular list so that processes can be easily woken up in
....@@ -683,26 +806,49 @@
683806 * it seems like the reasonable thing to do.
684807 *
685808 * Must be called with both the flc_lock and blocked_lock_lock held. The
686
- * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
687
- * that the flc_lock is also held on insertions we can avoid taking the
688
- * blocked_lock_lock in some cases when we see that the fl_block list is empty.
809
+ * fl_blocked_requests list itself is protected by the blocked_lock_lock,
810
+ * but by ensuring that the flc_lock is also held on insertions we can avoid
811
+ * taking the blocked_lock_lock in some cases when we see that the
812
+ * fl_blocked_requests list is empty.
813
+ *
814
+ * Rather than just adding to the list, we check for conflicts with any existing
815
+ * waiters, and add beneath any waiter that blocks the new waiter.
816
+ * Thus wakeups don't happen until needed.
689817 */
690818 static void __locks_insert_block(struct file_lock *blocker,
691
- struct file_lock *waiter)
819
+ struct file_lock *waiter,
820
+ bool conflict(struct file_lock *,
821
+ struct file_lock *))
692822 {
693
- BUG_ON(!list_empty(&waiter->fl_block));
694
- waiter->fl_next = blocker;
695
- list_add_tail(&waiter->fl_block, &blocker->fl_block);
823
+ struct file_lock *fl;
824
+ BUG_ON(!list_empty(&waiter->fl_blocked_member));
825
+
826
+new_blocker:
827
+ list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
828
+ if (conflict(fl, waiter)) {
829
+ blocker = fl;
830
+ goto new_blocker;
831
+ }
832
+ waiter->fl_blocker = blocker;
833
+ list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
696834 if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
697835 locks_insert_global_blocked(waiter);
836
+
837
+ /* The requests in waiter->fl_blocked are known to conflict with
838
+ * waiter, but might not conflict with blocker, or the requests
839
+ * and lock which block it. So they all need to be woken.
840
+ */
841
+ __locks_wake_up_blocks(waiter);
698842 }
699843
700844 /* Must be called with flc_lock held. */
701845 static void locks_insert_block(struct file_lock *blocker,
702
- struct file_lock *waiter)
846
+ struct file_lock *waiter,
847
+ bool conflict(struct file_lock *,
848
+ struct file_lock *))
703849 {
704850 spin_lock(&blocked_lock_lock);
705
- __locks_insert_block(blocker, waiter);
851
+ __locks_insert_block(blocker, waiter, conflict);
706852 spin_unlock(&blocked_lock_lock);
707853 }
708854
....@@ -716,25 +862,15 @@
716862 /*
717863 * Avoid taking global lock if list is empty. This is safe since new
718864 * blocked requests are only added to the list under the flc_lock, and
719
- * the flc_lock is always held here. Note that removal from the fl_block
720
- * list does not require the flc_lock, so we must recheck list_empty()
721
- * after acquiring the blocked_lock_lock.
865
+ * the flc_lock is always held here. Note that removal from the
866
+ * fl_blocked_requests list does not require the flc_lock, so we must
867
+ * recheck list_empty() after acquiring the blocked_lock_lock.
722868 */
723
- if (list_empty(&blocker->fl_block))
869
+ if (list_empty(&blocker->fl_blocked_requests))
724870 return;
725871
726872 spin_lock(&blocked_lock_lock);
727
- while (!list_empty(&blocker->fl_block)) {
728
- struct file_lock *waiter;
729
-
730
- waiter = list_first_entry(&blocker->fl_block,
731
- struct file_lock, fl_block);
732
- __locks_delete_block(waiter);
733
- if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
734
- waiter->fl_lmops->lm_notify(waiter);
735
- else
736
- wake_up(&waiter->fl_wait);
737
- }
873
+ __locks_wake_up_blocks(blocker);
738874 spin_unlock(&blocked_lock_lock);
739875 }
740876
....@@ -766,47 +902,50 @@
766902 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
767903 * checks for shared/exclusive status of overlapping locks.
768904 */
769
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
905
+static bool locks_conflict(struct file_lock *caller_fl,
906
+ struct file_lock *sys_fl)
770907 {
771908 if (sys_fl->fl_type == F_WRLCK)
772
- return 1;
909
+ return true;
773910 if (caller_fl->fl_type == F_WRLCK)
774
- return 1;
775
- return 0;
911
+ return true;
912
+ return false;
776913 }
777914
778915 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
779916 * checking before calling the locks_conflict().
780917 */
781
-static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
918
+static bool posix_locks_conflict(struct file_lock *caller_fl,
919
+ struct file_lock *sys_fl)
782920 {
783921 /* POSIX locks owned by the same process do not conflict with
784922 * each other.
785923 */
786924 if (posix_same_owner(caller_fl, sys_fl))
787
- return (0);
925
+ return false;
788926
789927 /* Check whether they overlap */
790928 if (!locks_overlap(caller_fl, sys_fl))
791
- return 0;
929
+ return false;
792930
793
- return (locks_conflict(caller_fl, sys_fl));
931
+ return locks_conflict(caller_fl, sys_fl);
794932 }
795933
796934 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
797935 * checking before calling the locks_conflict().
798936 */
799
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
937
+static bool flock_locks_conflict(struct file_lock *caller_fl,
938
+ struct file_lock *sys_fl)
800939 {
801940 /* FLOCK locks referring to the same filp do not conflict with
802941 * each other.
803942 */
804943 if (caller_fl->fl_file == sys_fl->fl_file)
805
- return (0);
944
+ return false;
806945 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
807
- return 0;
946
+ return false;
808947
809
- return (locks_conflict(caller_fl, sys_fl));
948
+ return locks_conflict(caller_fl, sys_fl);
810949 }
811950
812951 void
....@@ -877,8 +1016,11 @@
8771016 struct file_lock *fl;
8781017
8791018 hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
880
- if (posix_same_owner(fl, block_fl))
881
- return fl->fl_next;
1019
+ if (posix_same_owner(fl, block_fl)) {
1020
+ while (fl->fl_blocker)
1021
+ fl = fl->fl_blocker;
1022
+ return fl;
1023
+ }
8821024 }
8831025 return NULL;
8841026 }
....@@ -965,12 +1107,13 @@
9651107 if (!(request->fl_flags & FL_SLEEP))
9661108 goto out;
9671109 error = FILE_LOCK_DEFERRED;
968
- locks_insert_block(fl, request);
1110
+ locks_insert_block(fl, request, flock_locks_conflict);
9691111 goto out;
9701112 }
9711113 if (request->fl_flags & FL_ACCESS)
9721114 goto out;
9731115 locks_copy_lock(new_fl, request);
1116
+ locks_move_blocks(new_fl, request);
9741117 locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
9751118 new_fl = NULL;
9761119 error = 0;
....@@ -1037,14 +1180,20 @@
10371180 */
10381181 error = -EDEADLK;
10391182 spin_lock(&blocked_lock_lock);
1183
+ /*
1184
+ * Ensure that we don't find any locks blocked on this
1185
+ * request during deadlock detection.
1186
+ */
1187
+ __locks_wake_up_blocks(request);
10401188 if (likely(!posix_locks_deadlock(request, fl))) {
10411189 error = FILE_LOCK_DEFERRED;
1042
- __locks_insert_block(fl, request);
1190
+ __locks_insert_block(fl, request,
1191
+ posix_locks_conflict);
10431192 }
10441193 spin_unlock(&blocked_lock_lock);
10451194 goto out;
1046
- }
1047
- }
1195
+ }
1196
+ }
10481197
10491198 /* If we're just looking for a conflict, we're done. */
10501199 error = 0;
....@@ -1133,6 +1282,7 @@
11331282 if (!new_fl)
11341283 goto out;
11351284 locks_copy_lock(new_fl, request);
1285
+ locks_move_blocks(new_fl, request);
11361286 request = new_fl;
11371287 new_fl = NULL;
11381288 locks_insert_lock_ctx(request, &fl->fl_list);
....@@ -1164,6 +1314,7 @@
11641314 goto out;
11651315 }
11661316 locks_copy_lock(new_fl, request);
1317
+ locks_move_blocks(new_fl, request);
11671318 locks_insert_lock_ctx(new_fl, &fl->fl_list);
11681319 fl = new_fl;
11691320 new_fl = NULL;
....@@ -1188,6 +1339,7 @@
11881339 out:
11891340 spin_unlock(&ctx->flc_lock);
11901341 percpu_up_read(&file_rwsem);
1342
+ trace_posix_lock_inode(inode, request, error);
11911343 /*
11921344 * Free any unused locks.
11931345 */
....@@ -1196,7 +1348,6 @@
11961348 if (new_fl2)
11971349 locks_free_lock(new_fl2);
11981350 locks_dispose_list(&dispose);
1199
- trace_posix_lock_inode(inode, request, error);
12001351
12011352 return error;
12021353 }
....@@ -1237,13 +1388,12 @@
12371388 error = posix_lock_inode(inode, fl, NULL);
12381389 if (error != FILE_LOCK_DEFERRED)
12391390 break;
1240
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1241
- if (!error)
1242
- continue;
1243
-
1244
- locks_delete_block(fl);
1245
- break;
1391
+ error = wait_event_interruptible(fl->fl_wait,
1392
+ list_empty(&fl->fl_blocked_member));
1393
+ if (error)
1394
+ break;
12461395 }
1396
+ locks_delete_block(fl);
12471397 return error;
12481398 }
12491399
....@@ -1324,7 +1474,8 @@
13241474 error = posix_lock_inode(inode, &fl, NULL);
13251475 if (error != FILE_LOCK_DEFERRED)
13261476 break;
1327
- error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1477
+ error = wait_event_interruptible(fl.fl_wait,
1478
+ list_empty(&fl.fl_blocked_member));
13281479 if (!error) {
13291480 /*
13301481 * If we've been sleeping someone might have
....@@ -1334,13 +1485,12 @@
13341485 continue;
13351486 }
13361487
1337
- locks_delete_block(&fl);
13381488 break;
13391489 }
1490
+ locks_delete_block(&fl);
13401491
13411492 return error;
13421493 }
1343
-
13441494 EXPORT_SYMBOL(locks_mandatory_area);
13451495 #endif /* CONFIG_MANDATORY_FILE_LOCKING */
13461496
....@@ -1349,7 +1499,7 @@
13491499 switch (arg) {
13501500 case F_UNLCK:
13511501 fl->fl_flags &= ~FL_UNLOCK_PENDING;
1352
- /* fall through: */
1502
+ fallthrough;
13531503 case F_RDLCK:
13541504 fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
13551505 }
....@@ -1406,11 +1556,24 @@
14061556
14071557 static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
14081558 {
1409
- if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
1559
+ bool rc;
1560
+
1561
+ if (lease->fl_lmops->lm_breaker_owns_lease
1562
+ && lease->fl_lmops->lm_breaker_owns_lease(lease))
14101563 return false;
1411
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
1412
- return false;
1413
- return locks_conflict(breaker, lease);
1564
+ if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
1565
+ rc = false;
1566
+ goto trace;
1567
+ }
1568
+ if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
1569
+ rc = false;
1570
+ goto trace;
1571
+ }
1572
+
1573
+ rc = locks_conflict(breaker, lease);
1574
+trace:
1575
+ trace_leases_conflict(rc, lease, breaker);
1576
+ return rc;
14141577 }
14151578
14161579 static bool
....@@ -1459,7 +1622,7 @@
14591622 ctx = smp_load_acquire(&inode->i_flctx);
14601623 if (!ctx) {
14611624 WARN_ON_ONCE(1);
1462
- return error;
1625
+ goto free_lock;
14631626 }
14641627
14651628 percpu_down_read(&file_rwsem);
....@@ -1511,14 +1674,15 @@
15111674 break_time -= jiffies;
15121675 if (break_time == 0)
15131676 break_time++;
1514
- locks_insert_block(fl, new_fl);
1677
+ locks_insert_block(fl, new_fl, leases_conflict);
15151678 trace_break_lease_block(inode, new_fl);
15161679 spin_unlock(&ctx->flc_lock);
15171680 percpu_up_read(&file_rwsem);
15181681
15191682 locks_dispose_list(&dispose);
15201683 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1521
- !new_fl->fl_next, break_time);
1684
+ list_empty(&new_fl->fl_blocked_member),
1685
+ break_time);
15221686
15231687 percpu_down_read(&file_rwsem);
15241688 spin_lock(&ctx->flc_lock);
....@@ -1539,10 +1703,10 @@
15391703 spin_unlock(&ctx->flc_lock);
15401704 percpu_up_read(&file_rwsem);
15411705 locks_dispose_list(&dispose);
1706
+free_lock:
15421707 locks_free_lock(new_fl);
15431708 return error;
15441709 }
1545
-
15461710 EXPORT_SYMBOL(__break_lease);
15471711
15481712 /**
....@@ -1573,7 +1737,6 @@
15731737 if (has_lease)
15741738 *time = current_time(inode);
15751739 }
1576
-
15771740 EXPORT_SYMBOL(lease_get_mtime);
15781741
15791742 /**
....@@ -1627,10 +1790,10 @@
16271790 }
16281791
16291792 /**
1630
- * check_conflicting_open - see if the given dentry points to a file that has
1631
- * an existing open that would conflict with the
1632
- * desired lease.
1633
- * @dentry: dentry to check
1793
+ * check_conflicting_open - see if the given file points to an inode that has
1794
+ * an existing open that would conflict with the
1795
+ * desired lease.
1796
+ * @filp: file to check
16341797 * @arg: type of lease that we're trying to acquire
16351798 * @flags: current lock flags
16361799 *
....@@ -1638,30 +1801,42 @@
16381801 * conflict with the lease we're trying to set.
16391802 */
16401803 static int
1641
-check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
1804
+check_conflicting_open(struct file *filp, const long arg, int flags)
16421805 {
1643
- int ret = 0;
1644
- struct inode *inode = dentry->d_inode;
1806
+ struct inode *inode = locks_inode(filp);
1807
+ int self_wcount = 0, self_rcount = 0;
16451808
16461809 if (flags & FL_LAYOUT)
16471810 return 0;
16481811
1649
- if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1812
+ if (arg == F_RDLCK)
1813
+ return inode_is_open_for_write(inode) ? -EAGAIN : 0;
1814
+ else if (arg != F_WRLCK)
1815
+ return 0;
1816
+
1817
+ /*
1818
+ * Make sure that only read/write count is from lease requestor.
1819
+ * Note that this will result in denying write leases when i_writecount
1820
+ * is negative, which is what we want. (We shouldn't grant write leases
1821
+ * on files open for execution.)
1822
+ */
1823
+ if (filp->f_mode & FMODE_WRITE)
1824
+ self_wcount = 1;
1825
+ else if (filp->f_mode & FMODE_READ)
1826
+ self_rcount = 1;
1827
+
1828
+ if (atomic_read(&inode->i_writecount) != self_wcount ||
1829
+ atomic_read(&inode->i_readcount) != self_rcount)
16501830 return -EAGAIN;
16511831
1652
- if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
1653
- (atomic_read(&inode->i_count) > 1)))
1654
- ret = -EAGAIN;
1655
-
1656
- return ret;
1832
+ return 0;
16571833 }
16581834
16591835 static int
16601836 generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
16611837 {
16621838 struct file_lock *fl, *my_fl = NULL, *lease;
1663
- struct dentry *dentry = filp->f_path.dentry;
1664
- struct inode *inode = dentry->d_inode;
1839
+ struct inode *inode = locks_inode(filp);
16651840 struct file_lock_context *ctx;
16661841 bool is_deleg = (*flp)->fl_flags & FL_DELEG;
16671842 int error;
....@@ -1696,7 +1871,7 @@
16961871 percpu_down_read(&file_rwsem);
16971872 spin_lock(&ctx->flc_lock);
16981873 time_out_leases(inode, &dispose);
1699
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
1874
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
17001875 if (error)
17011876 goto out;
17021877
....@@ -1753,7 +1928,7 @@
17531928 * precedes these checks.
17541929 */
17551930 smp_mb();
1756
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
1931
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
17571932 if (error) {
17581933 locks_unlink_lock_ctx(lease);
17591934 goto out;
....@@ -1847,13 +2022,71 @@
18472022 }
18482023 EXPORT_SYMBOL(generic_setlease);
18492024
2025
+#if IS_ENABLED(CONFIG_SRCU)
2026
+/*
2027
+ * Kernel subsystems can register to be notified on any attempt to set
2028
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
2029
+ * to close files that it may have cached when there is an attempt to set a
2030
+ * conflicting lease.
2031
+ */
2032
+static struct srcu_notifier_head lease_notifier_chain;
2033
+
2034
+static inline void
2035
+lease_notifier_chain_init(void)
2036
+{
2037
+ srcu_init_notifier_head(&lease_notifier_chain);
2038
+}
2039
+
2040
+static inline void
2041
+setlease_notifier(long arg, struct file_lock *lease)
2042
+{
2043
+ if (arg != F_UNLCK)
2044
+ srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
2045
+}
2046
+
2047
+int lease_register_notifier(struct notifier_block *nb)
2048
+{
2049
+ return srcu_notifier_chain_register(&lease_notifier_chain, nb);
2050
+}
2051
+EXPORT_SYMBOL_GPL(lease_register_notifier);
2052
+
2053
+void lease_unregister_notifier(struct notifier_block *nb)
2054
+{
2055
+ srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
2056
+}
2057
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2058
+
2059
+#else /* !IS_ENABLED(CONFIG_SRCU) */
2060
+static inline void
2061
+lease_notifier_chain_init(void)
2062
+{
2063
+}
2064
+
2065
+static inline void
2066
+setlease_notifier(long arg, struct file_lock *lease)
2067
+{
2068
+}
2069
+
2070
+int lease_register_notifier(struct notifier_block *nb)
2071
+{
2072
+ return 0;
2073
+}
2074
+EXPORT_SYMBOL_GPL(lease_register_notifier);
2075
+
2076
+void lease_unregister_notifier(struct notifier_block *nb)
2077
+{
2078
+}
2079
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2080
+
2081
+#endif /* IS_ENABLED(CONFIG_SRCU) */
2082
+
18502083 /**
18512084 * vfs_setlease - sets a lease on an open file
18522085 * @filp: file pointer
18532086 * @arg: type of lease to obtain
18542087 * @lease: file_lock to use when adding a lease
18552088 * @priv: private info for lm_setup when adding a lease (may be
1856
- * NULL if lm_setup doesn't require it)
2089
+ * NULL if lm_setup doesn't require it)
18572090 *
18582091 * Call this to establish a lease on the file. The "lease" argument is not
18592092 * used for F_UNLCK requests and may be NULL. For commands that set or alter
....@@ -1867,6 +2100,8 @@
18672100 int
18682101 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
18692102 {
2103
+ if (lease)
2104
+ setlease_notifier(arg, *lease);
18702105 if (filp->f_op->setlease)
18712106 return filp->f_op->setlease(filp, arg, lease, priv);
18722107 else
....@@ -1931,13 +2166,12 @@
19312166 error = flock_lock_inode(inode, fl);
19322167 if (error != FILE_LOCK_DEFERRED)
19332168 break;
1934
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1935
- if (!error)
1936
- continue;
1937
-
1938
- locks_delete_block(fl);
1939
- break;
2169
+ error = wait_event_interruptible(fl->fl_wait,
2170
+ list_empty(&fl->fl_blocked_member));
2171
+ if (error)
2172
+ break;
19402173 }
2174
+ locks_delete_block(fl);
19412175 return error;
19422176 }
19432177
....@@ -2001,7 +2235,7 @@
20012235 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
20022236 goto out_putf;
20032237
2004
- lock = flock_make_lock(f.file, cmd);
2238
+ lock = flock_make_lock(f.file, cmd, NULL);
20052239 if (IS_ERR(lock)) {
20062240 error = PTR_ERR(lock);
20072241 goto out_putf;
....@@ -2143,7 +2377,7 @@
21432377 error = vfs_test_lock(filp, fl);
21442378 if (error)
21452379 goto out;
2146
-
2380
+
21472381 flock->l_type = fl->fl_type;
21482382 if (fl->fl_type != F_UNLCK) {
21492383 error = posix_lock_to_flock(flock, fl);
....@@ -2210,13 +2444,12 @@
22102444 error = vfs_lock_file(filp, cmd, fl, NULL);
22112445 if (error != FILE_LOCK_DEFERRED)
22122446 break;
2213
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
2214
- if (!error)
2215
- continue;
2216
-
2217
- locks_delete_block(fl);
2218
- break;
2447
+ error = wait_event_interruptible(fl->fl_wait,
2448
+ list_empty(&fl->fl_blocked_member));
2449
+ if (error)
2450
+ break;
22192451 }
2452
+ locks_delete_block(fl);
22202453
22212454 return error;
22222455 }
....@@ -2289,7 +2522,7 @@
22892522 cmd = F_SETLKW;
22902523 file_lock->fl_flags |= FL_OFDLCK;
22912524 file_lock->fl_owner = filp;
2292
- /* Fallthrough */
2525
+ fallthrough;
22932526 case F_SETLKW:
22942527 file_lock->fl_flags |= FL_SLEEP;
22952528 }
....@@ -2420,7 +2653,7 @@
24202653 cmd = F_SETLKW64;
24212654 file_lock->fl_flags |= FL_OFDLCK;
24222655 file_lock->fl_owner = filp;
2423
- /* Fallthrough */
2656
+ fallthrough;
24242657 case F_SETLKW64:
24252658 file_lock->fl_flags |= FL_SLEEP;
24262659 }
....@@ -2476,6 +2709,7 @@
24762709 if (!ctx || list_empty(&ctx->flc_posix))
24772710 return;
24782711
2712
+ locks_init_lock(&lock);
24792713 lock.fl_type = F_UNLCK;
24802714 lock.fl_flags = FL_POSIX | FL_CLOSE;
24812715 lock.fl_start = 0;
....@@ -2492,25 +2726,20 @@
24922726 lock.fl_ops->fl_release_private(&lock);
24932727 trace_locks_remove_posix(inode, &lock, error);
24942728 }
2495
-
24962729 EXPORT_SYMBOL(locks_remove_posix);
24972730
24982731 /* The i_flctx must be valid when calling into here */
24992732 static void
25002733 locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
25012734 {
2502
- struct file_lock fl = {
2503
- .fl_owner = filp,
2504
- .fl_pid = current->tgid,
2505
- .fl_file = filp,
2506
- .fl_flags = FL_FLOCK | FL_CLOSE,
2507
- .fl_type = F_UNLCK,
2508
- .fl_end = OFFSET_MAX,
2509
- };
2735
+ struct file_lock fl;
25102736 struct inode *inode = locks_inode(filp);
25112737
25122738 if (list_empty(&flctx->flc_flock))
25132739 return;
2740
+
2741
+ flock_make_lock(filp, LOCK_UN, &fl);
2742
+ fl.fl_flags |= FL_CLOSE;
25142743
25152744 if (filp->f_op->flock)
25162745 filp->f_op->flock(filp, F_SETLKW, &fl);
....@@ -2570,27 +2799,6 @@
25702799 }
25712800
25722801 /**
2573
- * posix_unblock_lock - stop waiting for a file lock
2574
- * @waiter: the lock which was waiting
2575
- *
2576
- * lockd needs to block waiting for locks.
2577
- */
2578
-int
2579
-posix_unblock_lock(struct file_lock *waiter)
2580
-{
2581
- int status = 0;
2582
-
2583
- spin_lock(&blocked_lock_lock);
2584
- if (waiter->fl_next)
2585
- __locks_delete_block(waiter);
2586
- else
2587
- status = -ENOENT;
2588
- spin_unlock(&blocked_lock_lock);
2589
- return status;
2590
-}
2591
-EXPORT_SYMBOL(posix_unblock_lock);
2592
-
2593
-/**
25942802 * vfs_cancel_lock - file byte range unblock lock
25952803 * @filp: The file to apply the unblock to
25962804 * @fl: The lock to be unblocked
....@@ -2603,8 +2811,30 @@
26032811 return filp->f_op->lock(filp, F_CANCELLK, fl);
26042812 return 0;
26052813 }
2606
-
26072814 EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2815
+
2816
+/**
2817
+ * vfs_inode_has_locks - are any file locks held on @inode?
2818
+ * @inode: inode to check for locks
2819
+ *
2820
+ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
2821
+ * set on @inode.
2822
+ */
2823
+bool vfs_inode_has_locks(struct inode *inode)
2824
+{
2825
+ struct file_lock_context *ctx;
2826
+ bool ret;
2827
+
2828
+ ctx = smp_load_acquire(&inode->i_flctx);
2829
+ if (!ctx)
2830
+ return false;
2831
+
2832
+ spin_lock(&ctx->flc_lock);
2833
+ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
2834
+ spin_unlock(&ctx->flc_lock);
2835
+ return ret;
2836
+}
2837
+EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
26082838
26092839 #ifdef CONFIG_PROC_FS
26102840 #include <linux/proc_fs.h>
....@@ -2620,7 +2850,7 @@
26202850 {
26212851 struct inode *inode = NULL;
26222852 unsigned int fl_pid;
2623
- struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
2853
+ struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
26242854
26252855 fl_pid = locks_translate_pid(fl, proc_pidns);
26262856 /*
....@@ -2671,10 +2901,10 @@
26712901 ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ "
26722902 : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
26732903 } else {
2674
- seq_printf(f, "%s ",
2675
- (lease_breaking(fl))
2676
- ? (fl->fl_type == F_UNLCK) ? "UNLCK" : "READ "
2677
- : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
2904
+ int type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
2905
+
2906
+ seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
2907
+ (type == F_RDLCK) ? "READ" : "UNLCK");
26782908 }
26792909 if (inode) {
26802910 /* userspace relies on this representation of dev_t */
....@@ -2698,7 +2928,7 @@
26982928 {
26992929 struct locks_iterator *iter = f->private;
27002930 struct file_lock *fl, *bfl;
2701
- struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
2931
+ struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
27022932
27032933 fl = hlist_entry(v, struct file_lock, fl_link);
27042934
....@@ -2707,7 +2937,7 @@
27072937
27082938 lock_get_status(f, fl, iter->li_pos, "");
27092939
2710
- list_for_each_entry(bfl, &fl->fl_block, fl_block)
2940
+ list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member)
27112941 lock_get_status(f, bfl, iter->li_pos, " ->");
27122942
27132943 return 0;
....@@ -2803,7 +3033,6 @@
28033033 filelock_cache = kmem_cache_create("file_lock_cache",
28043034 sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
28053035
2806
-
28073036 for_each_possible_cpu(i) {
28083037 struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
28093038
....@@ -2811,7 +3040,7 @@
28113040 INIT_HLIST_HEAD(&fll->hlist);
28123041 }
28133042
3043
+ lease_notifier_chain_init();
28143044 return 0;
28153045 }
2816
-
28173046 core_initcall(filelock_init);