hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/mbcache.c
....@@ -11,7 +11,7 @@
1111 /*
1212 * Mbcache is a simple key-value store. Keys need not be unique, however
1313 * key-value pairs are expected to be unique (we use this fact in
14
- * mb_cache_entry_delete()).
14
+ * mb_cache_entry_delete_or_get()).
1515 *
1616 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
1717 * Ext4 also uses it for deduplication of xattr values stored in inodes.
....@@ -90,12 +90,19 @@
9090 return -ENOMEM;
9191
9292 INIT_LIST_HEAD(&entry->e_list);
93
- /* One ref for hash, one ref returned */
94
- atomic_set(&entry->e_refcnt, 1);
93
+ /*
94
+ * We create entry with two references. One reference is kept by the
95
+ * hash table, the other reference is used to protect us from
96
+ * mb_cache_entry_delete_or_get() until the entry is fully setup. This
97
+ * avoids nesting of cache->c_list_lock into hash table bit locks which
98
+ * is problematic for RT.
99
+ */
100
+ atomic_set(&entry->e_refcnt, 2);
95101 entry->e_key = key;
96102 entry->e_value = value;
97
- entry->e_reusable = reusable;
98
- entry->e_referenced = 0;
103
+ entry->e_flags = 0;
104
+ if (reusable)
105
+ set_bit(MBE_REUSABLE_B, &entry->e_flags);
99106 head = mb_cache_entry_head(cache, key);
100107 hlist_bl_lock(head);
101108 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
....@@ -107,23 +114,40 @@
107114 }
108115 hlist_bl_add_head(&entry->e_hash_list, head);
109116 hlist_bl_unlock(head);
110
-
111117 spin_lock(&cache->c_list_lock);
112118 list_add_tail(&entry->e_list, &cache->c_list);
113
- /* Grab ref for LRU list */
114
- atomic_inc(&entry->e_refcnt);
115119 cache->c_entry_count++;
116120 spin_unlock(&cache->c_list_lock);
121
+ mb_cache_entry_put(cache, entry);
117122
118123 return 0;
119124 }
120125 EXPORT_SYMBOL(mb_cache_entry_create);
121126
122
-void __mb_cache_entry_free(struct mb_cache_entry *entry)
127
+void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
123128 {
129
+ struct hlist_bl_head *head;
130
+
131
+ head = mb_cache_entry_head(cache, entry->e_key);
132
+ hlist_bl_lock(head);
133
+ hlist_bl_del(&entry->e_hash_list);
134
+ hlist_bl_unlock(head);
124135 kmem_cache_free(mb_entry_cache, entry);
125136 }
126137 EXPORT_SYMBOL(__mb_cache_entry_free);
138
+
139
+/*
140
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
141
+ *
142
+ * @entry - entry to work on
143
+ *
144
+ * Wait to be the last user of the entry.
145
+ */
146
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
147
+{
148
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
149
+}
150
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
127151
128152 static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
129153 struct mb_cache_entry *entry,
....@@ -142,10 +166,10 @@
142166 while (node) {
143167 entry = hlist_bl_entry(node, struct mb_cache_entry,
144168 e_hash_list);
145
- if (entry->e_key == key && entry->e_reusable) {
146
- atomic_inc(&entry->e_refcnt);
169
+ if (entry->e_key == key &&
170
+ test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
171
+ atomic_inc_not_zero(&entry->e_refcnt))
147172 goto out;
148
- }
149173 node = node->next;
150174 }
151175 entry = NULL;
....@@ -205,10 +229,9 @@
205229 head = mb_cache_entry_head(cache, key);
206230 hlist_bl_lock(head);
207231 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
208
- if (entry->e_key == key && entry->e_value == value) {
209
- atomic_inc(&entry->e_refcnt);
232
+ if (entry->e_key == key && entry->e_value == value &&
233
+ atomic_inc_not_zero(&entry->e_refcnt))
210234 goto out;
211
- }
212235 }
213236 entry = NULL;
214237 out:
....@@ -217,7 +240,7 @@
217240 }
218241 EXPORT_SYMBOL(mb_cache_entry_get);
219242
220
-/* mb_cache_entry_delete - remove a cache entry
243
+/* mb_cache_entry_delete - try to remove a cache entry
221244 * @cache - cache we work with
222245 * @key - key
223246 * @value - value
....@@ -254,6 +277,43 @@
254277 }
255278 EXPORT_SYMBOL(mb_cache_entry_delete);
256279
280
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
281
+ * @cache - cache we work with
282
+ * @key - key
283
+ * @value - value
284
+ *
285
+ * Remove entry from cache @cache with key @key and value @value. The removal
286
+ * happens only if the entry is unused. The function returns NULL in case the
287
+ * entry was successfully removed or there's no entry in cache. Otherwise the
288
+ * function grabs reference of the entry that we failed to delete because it
289
+ * still has users and return it.
290
+ */
291
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
292
+ u32 key, u64 value)
293
+{
294
+ struct mb_cache_entry *entry;
295
+
296
+ entry = mb_cache_entry_get(cache, key, value);
297
+ if (!entry)
298
+ return NULL;
299
+
300
+ /*
301
+ * Drop the ref we got from mb_cache_entry_get() and the initial hash
302
+ * ref if we are the last user
303
+ */
304
+ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
305
+ return entry;
306
+
307
+ spin_lock(&cache->c_list_lock);
308
+ if (!list_empty(&entry->e_list))
309
+ list_del_init(&entry->e_list);
310
+ cache->c_entry_count--;
311
+ spin_unlock(&cache->c_list_lock);
312
+ __mb_cache_entry_free(cache, entry);
313
+ return NULL;
314
+}
315
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
316
+
257317 /* mb_cache_entry_touch - cache entry got used
258318 * @cache - cache the entry belongs to
259319 * @entry - entry that got used
....@@ -263,7 +323,7 @@
263323 void mb_cache_entry_touch(struct mb_cache *cache,
264324 struct mb_cache_entry *entry)
265325 {
266
- entry->e_referenced = 1;
326
+ set_bit(MBE_REFERENCED_B, &entry->e_flags);
267327 }
268328 EXPORT_SYMBOL(mb_cache_entry_touch);
269329
....@@ -281,34 +341,24 @@
281341 unsigned long nr_to_scan)
282342 {
283343 struct mb_cache_entry *entry;
284
- struct hlist_bl_head *head;
285344 unsigned long shrunk = 0;
286345
287346 spin_lock(&cache->c_list_lock);
288347 while (nr_to_scan-- && !list_empty(&cache->c_list)) {
289348 entry = list_first_entry(&cache->c_list,
290349 struct mb_cache_entry, e_list);
291
- if (entry->e_referenced) {
292
- entry->e_referenced = 0;
350
+ /* Drop initial hash reference if there is no user */
351
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
352
+ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
353
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
293354 list_move_tail(&entry->e_list, &cache->c_list);
294355 continue;
295356 }
296357 list_del_init(&entry->e_list);
297358 cache->c_entry_count--;
298
- /*
299
- * We keep LRU list reference so that entry doesn't go away
300
- * from under us.
301
- */
302359 spin_unlock(&cache->c_list_lock);
303
- head = mb_cache_entry_head(cache, entry->e_key);
304
- hlist_bl_lock(head);
305
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
306
- hlist_bl_del_init(&entry->e_hash_list);
307
- atomic_dec(&entry->e_refcnt);
308
- }
309
- hlist_bl_unlock(head);
310
- if (mb_cache_entry_put(cache, entry))
311
- shrunk++;
360
+ __mb_cache_entry_free(cache, entry);
361
+ shrunk++;
312362 cond_resched();
313363 spin_lock(&cache->c_list_lock);
314364 }
....@@ -400,11 +450,6 @@
400450 * point.
401451 */
402452 list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
403
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
404
- hlist_bl_del_init(&entry->e_hash_list);
405
- atomic_dec(&entry->e_refcnt);
406
- } else
407
- WARN_ON(1);
408453 list_del(&entry->e_list);
409454 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
410455 mb_cache_entry_put(cache, entry);