hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/mbcache.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #include <linux/spinlock.h>
23 #include <linux/slab.h>
34 #include <linux/list.h>
....@@ -10,7 +11,7 @@
1011 /*
1112 * Mbcache is a simple key-value store. Keys need not be unique, however
1213 * key-value pairs are expected to be unique (we use this fact in
13
- * mb_cache_entry_delete()).
14
+ * mb_cache_entry_delete_or_get()).
1415 *
1516 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
1617 * Ext4 also uses it for deduplication of xattr values stored in inodes.
....@@ -89,12 +90,19 @@
8990 return -ENOMEM;
9091
9192 INIT_LIST_HEAD(&entry->e_list);
92
- /* One ref for hash, one ref returned */
93
- atomic_set(&entry->e_refcnt, 1);
93
+ /*
94
+ * We create entry with two references. One reference is kept by the
95
+ * hash table, the other reference is used to protect us from
96
+ * mb_cache_entry_delete_or_get() until the entry is fully setup. This
97
+ * avoids nesting of cache->c_list_lock into hash table bit locks which
98
+ * is problematic for RT.
99
+ */
100
+ atomic_set(&entry->e_refcnt, 2);
94101 entry->e_key = key;
95102 entry->e_value = value;
96
- entry->e_reusable = reusable;
97
- entry->e_referenced = 0;
103
+ entry->e_flags = 0;
104
+ if (reusable)
105
+ set_bit(MBE_REUSABLE_B, &entry->e_flags);
98106 head = mb_cache_entry_head(cache, key);
99107 hlist_bl_lock(head);
100108 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
....@@ -106,23 +114,40 @@
106114 }
107115 hlist_bl_add_head(&entry->e_hash_list, head);
108116 hlist_bl_unlock(head);
109
-
110117 spin_lock(&cache->c_list_lock);
111118 list_add_tail(&entry->e_list, &cache->c_list);
112
- /* Grab ref for LRU list */
113
- atomic_inc(&entry->e_refcnt);
114119 cache->c_entry_count++;
115120 spin_unlock(&cache->c_list_lock);
121
+ mb_cache_entry_put(cache, entry);
116122
117123 return 0;
118124 }
119125 EXPORT_SYMBOL(mb_cache_entry_create);
120126
121
-void __mb_cache_entry_free(struct mb_cache_entry *entry)
127
+void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
122128 {
129
+ struct hlist_bl_head *head;
130
+
131
+ head = mb_cache_entry_head(cache, entry->e_key);
132
+ hlist_bl_lock(head);
133
+ hlist_bl_del(&entry->e_hash_list);
134
+ hlist_bl_unlock(head);
123135 kmem_cache_free(mb_entry_cache, entry);
124136 }
125137 EXPORT_SYMBOL(__mb_cache_entry_free);
138
+
139
+/*
140
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
141
+ *
142
+ * @entry - entry to work on
143
+ *
144
+ * Wait to be the last user of the entry.
145
+ */
146
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
147
+{
148
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
149
+}
150
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
126151
127152 static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
128153 struct mb_cache_entry *entry,
....@@ -141,10 +166,10 @@
141166 while (node) {
142167 entry = hlist_bl_entry(node, struct mb_cache_entry,
143168 e_hash_list);
144
- if (entry->e_key == key && entry->e_reusable) {
145
- atomic_inc(&entry->e_refcnt);
169
+ if (entry->e_key == key &&
170
+ test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
171
+ atomic_inc_not_zero(&entry->e_refcnt))
146172 goto out;
147
- }
148173 node = node->next;
149174 }
150175 entry = NULL;
....@@ -204,10 +229,9 @@
204229 head = mb_cache_entry_head(cache, key);
205230 hlist_bl_lock(head);
206231 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
207
- if (entry->e_key == key && entry->e_value == value) {
208
- atomic_inc(&entry->e_refcnt);
232
+ if (entry->e_key == key && entry->e_value == value &&
233
+ atomic_inc_not_zero(&entry->e_refcnt))
209234 goto out;
210
- }
211235 }
212236 entry = NULL;
213237 out:
....@@ -216,7 +240,7 @@
216240 }
217241 EXPORT_SYMBOL(mb_cache_entry_get);
218242
219
-/* mb_cache_entry_delete - remove a cache entry
243
+/* mb_cache_entry_delete - try to remove a cache entry
220244 * @cache - cache we work with
221245 * @key - key
222246 * @value - value
....@@ -253,6 +277,43 @@
253277 }
254278 EXPORT_SYMBOL(mb_cache_entry_delete);
255279
280
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
281
+ * @cache - cache we work with
282
+ * @key - key
283
+ * @value - value
284
+ *
285
+ * Remove entry from cache @cache with key @key and value @value. The removal
286
+ * happens only if the entry is unused. The function returns NULL in case the
287
+ * entry was successfully removed or there's no entry in cache. Otherwise the
288
+ * function grabs reference of the entry that we failed to delete because it
289
+ * still has users and return it.
290
+ */
291
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
292
+ u32 key, u64 value)
293
+{
294
+ struct mb_cache_entry *entry;
295
+
296
+ entry = mb_cache_entry_get(cache, key, value);
297
+ if (!entry)
298
+ return NULL;
299
+
300
+ /*
301
+ * Drop the ref we got from mb_cache_entry_get() and the initial hash
302
+ * ref if we are the last user
303
+ */
304
+ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
305
+ return entry;
306
+
307
+ spin_lock(&cache->c_list_lock);
308
+ if (!list_empty(&entry->e_list))
309
+ list_del_init(&entry->e_list);
310
+ cache->c_entry_count--;
311
+ spin_unlock(&cache->c_list_lock);
312
+ __mb_cache_entry_free(cache, entry);
313
+ return NULL;
314
+}
315
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
316
+
256317 /* mb_cache_entry_touch - cache entry got used
257318 * @cache - cache the entry belongs to
258319 * @entry - entry that got used
....@@ -262,7 +323,7 @@
262323 void mb_cache_entry_touch(struct mb_cache *cache,
263324 struct mb_cache_entry *entry)
264325 {
265
- entry->e_referenced = 1;
326
+ set_bit(MBE_REFERENCED_B, &entry->e_flags);
266327 }
267328 EXPORT_SYMBOL(mb_cache_entry_touch);
268329
....@@ -280,34 +341,24 @@
280341 unsigned long nr_to_scan)
281342 {
282343 struct mb_cache_entry *entry;
283
- struct hlist_bl_head *head;
284344 unsigned long shrunk = 0;
285345
286346 spin_lock(&cache->c_list_lock);
287347 while (nr_to_scan-- && !list_empty(&cache->c_list)) {
288348 entry = list_first_entry(&cache->c_list,
289349 struct mb_cache_entry, e_list);
290
- if (entry->e_referenced) {
291
- entry->e_referenced = 0;
350
+ /* Drop initial hash reference if there is no user */
351
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
352
+ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
353
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
292354 list_move_tail(&entry->e_list, &cache->c_list);
293355 continue;
294356 }
295357 list_del_init(&entry->e_list);
296358 cache->c_entry_count--;
297
- /*
298
- * We keep LRU list reference so that entry doesn't go away
299
- * from under us.
300
- */
301359 spin_unlock(&cache->c_list_lock);
302
- head = mb_cache_entry_head(cache, entry->e_key);
303
- hlist_bl_lock(head);
304
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
305
- hlist_bl_del_init(&entry->e_hash_list);
306
- atomic_dec(&entry->e_refcnt);
307
- }
308
- hlist_bl_unlock(head);
309
- if (mb_cache_entry_put(cache, entry))
310
- shrunk++;
360
+ __mb_cache_entry_free(cache, entry);
361
+ shrunk++;
311362 cond_resched();
312363 spin_lock(&cache->c_list_lock);
313364 }
....@@ -399,11 +450,6 @@
399450 * point.
400451 */
401452 list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
402
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
403
- hlist_bl_del_init(&entry->e_hash_list);
404
- atomic_dec(&entry->e_refcnt);
405
- } else
406
- WARN_ON(1);
407453 list_del(&entry->e_list);
408454 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
409455 mb_cache_entry_put(cache, entry);