.. | .. |
---|
11 | 11 | /* |
---|
12 | 12 | * Mbcache is a simple key-value store. Keys need not be unique, however |
---|
13 | 13 | * key-value pairs are expected to be unique (we use this fact in |
---|
14 | | - * mb_cache_entry_delete()). |
---|
| 14 | + * mb_cache_entry_delete_or_get()). |
---|
15 | 15 | * |
---|
16 | 16 | * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. |
---|
17 | 17 | * Ext4 also uses it for deduplication of xattr values stored in inodes. |
---|
.. | .. |
---|
90 | 90 | return -ENOMEM; |
---|
91 | 91 | |
---|
92 | 92 | INIT_LIST_HEAD(&entry->e_list); |
---|
93 | | - /* One ref for hash, one ref returned */ |
---|
94 | | - atomic_set(&entry->e_refcnt, 1); |
---|
| 93 | + /* |
---|
| 94 | + * We create entry with two references. One reference is kept by the |
---|
| 95 | + * hash table, the other reference is used to protect us from |
---|
| 96 | + * mb_cache_entry_delete_or_get() until the entry is fully setup. This |
---|
| 97 | + * avoids nesting of cache->c_list_lock into hash table bit locks which |
---|
| 98 | + * is problematic for RT. |
---|
| 99 | + */ |
---|
| 100 | + atomic_set(&entry->e_refcnt, 2); |
---|
95 | 101 | entry->e_key = key; |
---|
96 | 102 | entry->e_value = value; |
---|
97 | | - entry->e_reusable = reusable; |
---|
98 | | - entry->e_referenced = 0; |
---|
| 103 | + entry->e_flags = 0; |
---|
| 104 | + if (reusable) |
---|
| 105 | + set_bit(MBE_REUSABLE_B, &entry->e_flags); |
---|
99 | 106 | head = mb_cache_entry_head(cache, key); |
---|
100 | 107 | hlist_bl_lock(head); |
---|
101 | 108 | hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { |
---|
.. | .. |
---|
107 | 114 | } |
---|
108 | 115 | hlist_bl_add_head(&entry->e_hash_list, head); |
---|
109 | 116 | hlist_bl_unlock(head); |
---|
110 | | - |
---|
111 | 117 | spin_lock(&cache->c_list_lock); |
---|
112 | 118 | list_add_tail(&entry->e_list, &cache->c_list); |
---|
113 | | - /* Grab ref for LRU list */ |
---|
114 | | - atomic_inc(&entry->e_refcnt); |
---|
115 | 119 | cache->c_entry_count++; |
---|
116 | 120 | spin_unlock(&cache->c_list_lock); |
---|
| 121 | + mb_cache_entry_put(cache, entry); |
---|
117 | 122 | |
---|
118 | 123 | return 0; |
---|
119 | 124 | } |
---|
120 | 125 | EXPORT_SYMBOL(mb_cache_entry_create); |
---|
121 | 126 | |
---|
122 | | -void __mb_cache_entry_free(struct mb_cache_entry *entry) |
---|
| 127 | +void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry) |
---|
123 | 128 | { |
---|
| 129 | + struct hlist_bl_head *head; |
---|
| 130 | + |
---|
| 131 | + head = mb_cache_entry_head(cache, entry->e_key); |
---|
| 132 | + hlist_bl_lock(head); |
---|
| 133 | + hlist_bl_del(&entry->e_hash_list); |
---|
| 134 | + hlist_bl_unlock(head); |
---|
124 | 135 | kmem_cache_free(mb_entry_cache, entry); |
---|
125 | 136 | } |
---|
126 | 137 | EXPORT_SYMBOL(__mb_cache_entry_free); |
---|
| 138 | + |
---|
| 139 | +/* |
---|
| 140 | + * mb_cache_entry_wait_unused - wait to be the last user of the entry |
---|
| 141 | + * |
---|
| 142 | + * @entry - entry to work on |
---|
| 143 | + * |
---|
| 144 | + * Wait to be the last user of the entry. |
---|
| 145 | + */ |
---|
| 146 | +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) |
---|
| 147 | +{ |
---|
| 148 | + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2); |
---|
| 149 | +} |
---|
| 150 | +EXPORT_SYMBOL(mb_cache_entry_wait_unused); |
---|
127 | 151 | |
---|
128 | 152 | static struct mb_cache_entry *__entry_find(struct mb_cache *cache, |
---|
129 | 153 | struct mb_cache_entry *entry, |
---|
.. | .. |
---|
142 | 166 | while (node) { |
---|
143 | 167 | entry = hlist_bl_entry(node, struct mb_cache_entry, |
---|
144 | 168 | e_hash_list); |
---|
145 | | - if (entry->e_key == key && entry->e_reusable) { |
---|
146 | | - atomic_inc(&entry->e_refcnt); |
---|
| 169 | + if (entry->e_key == key && |
---|
| 170 | + test_bit(MBE_REUSABLE_B, &entry->e_flags) && |
---|
| 171 | + atomic_inc_not_zero(&entry->e_refcnt)) |
---|
147 | 172 | goto out; |
---|
148 | | - } |
---|
149 | 173 | node = node->next; |
---|
150 | 174 | } |
---|
151 | 175 | entry = NULL; |
---|
.. | .. |
---|
205 | 229 | head = mb_cache_entry_head(cache, key); |
---|
206 | 230 | hlist_bl_lock(head); |
---|
207 | 231 | hlist_bl_for_each_entry(entry, node, head, e_hash_list) { |
---|
208 | | - if (entry->e_key == key && entry->e_value == value) { |
---|
209 | | - atomic_inc(&entry->e_refcnt); |
---|
| 232 | + if (entry->e_key == key && entry->e_value == value && |
---|
| 233 | + atomic_inc_not_zero(&entry->e_refcnt)) |
---|
210 | 234 | goto out; |
---|
211 | | - } |
---|
212 | 235 | } |
---|
213 | 236 | entry = NULL; |
---|
214 | 237 | out: |
---|
.. | .. |
---|
217 | 240 | } |
---|
218 | 241 | EXPORT_SYMBOL(mb_cache_entry_get); |
---|
219 | 242 | |
---|
220 | | -/* mb_cache_entry_delete - remove a cache entry |
---|
| 243 | +/* mb_cache_entry_delete - try to remove a cache entry |
---|
221 | 244 | * @cache - cache we work with |
---|
222 | 245 | * @key - key |
---|
223 | 246 | * @value - value |
---|
.. | .. |
---|
254 | 277 | } |
---|
255 | 278 | EXPORT_SYMBOL(mb_cache_entry_delete); |
---|
256 | 279 | |
---|
| 280 | +/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users |
---|
| 281 | + * @cache - cache we work with |
---|
| 282 | + * @key - key |
---|
| 283 | + * @value - value |
---|
| 284 | + * |
---|
| 285 | + * Remove entry from cache @cache with key @key and value @value. The removal |
---|
| 286 | + * happens only if the entry is unused. The function returns NULL in case the |
---|
| 287 | + * entry was successfully removed or there's no entry in cache. Otherwise the |
---|
| 288 | + * function grabs reference of the entry that we failed to delete because it |
---|
| 289 | + * still has users and return it. |
---|
| 290 | + */ |
---|
| 291 | +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, |
---|
| 292 | + u32 key, u64 value) |
---|
| 293 | +{ |
---|
| 294 | + struct mb_cache_entry *entry; |
---|
| 295 | + |
---|
| 296 | + entry = mb_cache_entry_get(cache, key, value); |
---|
| 297 | + if (!entry) |
---|
| 298 | + return NULL; |
---|
| 299 | + |
---|
| 300 | + /* |
---|
| 301 | + * Drop the ref we got from mb_cache_entry_get() and the initial hash |
---|
| 302 | + * ref if we are the last user |
---|
| 303 | + */ |
---|
| 304 | + if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2) |
---|
| 305 | + return entry; |
---|
| 306 | + |
---|
| 307 | + spin_lock(&cache->c_list_lock); |
---|
| 308 | + if (!list_empty(&entry->e_list)) |
---|
| 309 | + list_del_init(&entry->e_list); |
---|
| 310 | + cache->c_entry_count--; |
---|
| 311 | + spin_unlock(&cache->c_list_lock); |
---|
| 312 | + __mb_cache_entry_free(cache, entry); |
---|
| 313 | + return NULL; |
---|
| 314 | +} |
---|
| 315 | +EXPORT_SYMBOL(mb_cache_entry_delete_or_get); |
---|
| 316 | + |
---|
257 | 317 | /* mb_cache_entry_touch - cache entry got used |
---|
258 | 318 | * @cache - cache the entry belongs to |
---|
259 | 319 | * @entry - entry that got used |
---|
.. | .. |
---|
263 | 323 | void mb_cache_entry_touch(struct mb_cache *cache, |
---|
264 | 324 | struct mb_cache_entry *entry) |
---|
265 | 325 | { |
---|
266 | | - entry->e_referenced = 1; |
---|
| 326 | + set_bit(MBE_REFERENCED_B, &entry->e_flags); |
---|
267 | 327 | } |
---|
268 | 328 | EXPORT_SYMBOL(mb_cache_entry_touch); |
---|
269 | 329 | |
---|
.. | .. |
---|
281 | 341 | unsigned long nr_to_scan) |
---|
282 | 342 | { |
---|
283 | 343 | struct mb_cache_entry *entry; |
---|
284 | | - struct hlist_bl_head *head; |
---|
285 | 344 | unsigned long shrunk = 0; |
---|
286 | 345 | |
---|
287 | 346 | spin_lock(&cache->c_list_lock); |
---|
288 | 347 | while (nr_to_scan-- && !list_empty(&cache->c_list)) { |
---|
289 | 348 | entry = list_first_entry(&cache->c_list, |
---|
290 | 349 | struct mb_cache_entry, e_list); |
---|
291 | | - if (entry->e_referenced) { |
---|
292 | | - entry->e_referenced = 0; |
---|
| 350 | + /* Drop initial hash reference if there is no user */ |
---|
| 351 | + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || |
---|
| 352 | + atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { |
---|
| 353 | + clear_bit(MBE_REFERENCED_B, &entry->e_flags); |
---|
293 | 354 | list_move_tail(&entry->e_list, &cache->c_list); |
---|
294 | 355 | continue; |
---|
295 | 356 | } |
---|
296 | 357 | list_del_init(&entry->e_list); |
---|
297 | 358 | cache->c_entry_count--; |
---|
298 | | - /* |
---|
299 | | - * We keep LRU list reference so that entry doesn't go away |
---|
300 | | - * from under us. |
---|
301 | | - */ |
---|
302 | 359 | spin_unlock(&cache->c_list_lock); |
---|
303 | | - head = mb_cache_entry_head(cache, entry->e_key); |
---|
304 | | - hlist_bl_lock(head); |
---|
305 | | - if (!hlist_bl_unhashed(&entry->e_hash_list)) { |
---|
306 | | - hlist_bl_del_init(&entry->e_hash_list); |
---|
307 | | - atomic_dec(&entry->e_refcnt); |
---|
308 | | - } |
---|
309 | | - hlist_bl_unlock(head); |
---|
310 | | - if (mb_cache_entry_put(cache, entry)) |
---|
311 | | - shrunk++; |
---|
| 360 | + __mb_cache_entry_free(cache, entry); |
---|
| 361 | + shrunk++; |
---|
312 | 362 | cond_resched(); |
---|
313 | 363 | spin_lock(&cache->c_list_lock); |
---|
314 | 364 | } |
---|
.. | .. |
---|
400 | 450 | * point. |
---|
401 | 451 | */ |
---|
402 | 452 | list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { |
---|
403 | | - if (!hlist_bl_unhashed(&entry->e_hash_list)) { |
---|
404 | | - hlist_bl_del_init(&entry->e_hash_list); |
---|
405 | | - atomic_dec(&entry->e_refcnt); |
---|
406 | | - } else |
---|
407 | | - WARN_ON(1); |
---|
408 | 453 | list_del(&entry->e_list); |
---|
409 | 454 | WARN_ON(atomic_read(&entry->e_refcnt) != 1); |
---|
410 | 455 | mb_cache_entry_put(cache, entry); |
---|