| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | #include <linux/spinlock.h> |
|---|
| 2 | 3 | #include <linux/slab.h> |
|---|
| 3 | 4 | #include <linux/list.h> |
|---|
| .. | .. |
|---|
| 10 | 11 | /* |
|---|
| 11 | 12 | * Mbcache is a simple key-value store. Keys need not be unique, however |
|---|
| 12 | 13 | * key-value pairs are expected to be unique (we use this fact in |
|---|
| 13 | | - * mb_cache_entry_delete()). |
|---|
| 14 | + * mb_cache_entry_delete_or_get()). |
|---|
| 14 | 15 | * |
|---|
| 15 | 16 | * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. |
|---|
| 16 | 17 | * Ext4 also uses it for deduplication of xattr values stored in inodes. |
|---|
| .. | .. |
|---|
| 89 | 90 | return -ENOMEM; |
|---|
| 90 | 91 | |
|---|
| 91 | 92 | INIT_LIST_HEAD(&entry->e_list); |
|---|
| 92 | | - /* One ref for hash, one ref returned */ |
|---|
| 93 | | - atomic_set(&entry->e_refcnt, 1); |
|---|
| 93 | + /* |
|---|
| 94 | + * We create entry with two references. One reference is kept by the |
|---|
| 95 | + * hash table, the other reference is used to protect us from |
|---|
| 96 | + * mb_cache_entry_delete_or_get() until the entry is fully setup. This |
|---|
| 97 | + * avoids nesting of cache->c_list_lock into hash table bit locks which |
|---|
| 98 | + * is problematic for RT. |
|---|
| 99 | + */ |
|---|
| 100 | + atomic_set(&entry->e_refcnt, 2); |
|---|
| 94 | 101 | entry->e_key = key; |
|---|
| 95 | 102 | entry->e_value = value; |
|---|
| 96 | | - entry->e_reusable = reusable; |
|---|
| 97 | | - entry->e_referenced = 0; |
|---|
| 103 | + entry->e_flags = 0; |
|---|
| 104 | + if (reusable) |
|---|
| 105 | + set_bit(MBE_REUSABLE_B, &entry->e_flags); |
|---|
| 98 | 106 | head = mb_cache_entry_head(cache, key); |
|---|
| 99 | 107 | hlist_bl_lock(head); |
|---|
| 100 | 108 | hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { |
|---|
| .. | .. |
|---|
| 106 | 114 | } |
|---|
| 107 | 115 | hlist_bl_add_head(&entry->e_hash_list, head); |
|---|
| 108 | 116 | hlist_bl_unlock(head); |
|---|
| 109 | | - |
|---|
| 110 | 117 | spin_lock(&cache->c_list_lock); |
|---|
| 111 | 118 | list_add_tail(&entry->e_list, &cache->c_list); |
|---|
| 112 | | - /* Grab ref for LRU list */ |
|---|
| 113 | | - atomic_inc(&entry->e_refcnt); |
|---|
| 114 | 119 | cache->c_entry_count++; |
|---|
| 115 | 120 | spin_unlock(&cache->c_list_lock); |
|---|
| 121 | + mb_cache_entry_put(cache, entry); |
|---|
| 116 | 122 | |
|---|
| 117 | 123 | return 0; |
|---|
| 118 | 124 | } |
|---|
| 119 | 125 | EXPORT_SYMBOL(mb_cache_entry_create); |
|---|
| 120 | 126 | |
|---|
| 121 | | -void __mb_cache_entry_free(struct mb_cache_entry *entry) |
|---|
| 127 | +void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry) |
|---|
| 122 | 128 | { |
|---|
| 129 | + struct hlist_bl_head *head; |
|---|
| 130 | + |
|---|
| 131 | + head = mb_cache_entry_head(cache, entry->e_key); |
|---|
| 132 | + hlist_bl_lock(head); |
|---|
| 133 | + hlist_bl_del(&entry->e_hash_list); |
|---|
| 134 | + hlist_bl_unlock(head); |
|---|
| 123 | 135 | kmem_cache_free(mb_entry_cache, entry); |
|---|
| 124 | 136 | } |
|---|
| 125 | 137 | EXPORT_SYMBOL(__mb_cache_entry_free); |
|---|
| 138 | + |
|---|
| 139 | +/* |
|---|
| 140 | + * mb_cache_entry_wait_unused - wait to be the last user of the entry |
|---|
| 141 | + * |
|---|
| 142 | + * @entry - entry to work on |
|---|
| 143 | + * |
|---|
| 144 | + * Wait to be the last user of the entry. |
|---|
| 145 | + */ |
|---|
| 146 | +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) |
|---|
| 147 | +{ |
|---|
| 148 | + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2); |
|---|
| 149 | +} |
|---|
| 150 | +EXPORT_SYMBOL(mb_cache_entry_wait_unused); |
|---|
| 126 | 151 | |
|---|
| 127 | 152 | static struct mb_cache_entry *__entry_find(struct mb_cache *cache, |
|---|
| 128 | 153 | struct mb_cache_entry *entry, |
|---|
| .. | .. |
|---|
| 141 | 166 | while (node) { |
|---|
| 142 | 167 | entry = hlist_bl_entry(node, struct mb_cache_entry, |
|---|
| 143 | 168 | e_hash_list); |
|---|
| 144 | | - if (entry->e_key == key && entry->e_reusable) { |
|---|
| 145 | | - atomic_inc(&entry->e_refcnt); |
|---|
| 169 | + if (entry->e_key == key && |
|---|
| 170 | + test_bit(MBE_REUSABLE_B, &entry->e_flags) && |
|---|
| 171 | + atomic_inc_not_zero(&entry->e_refcnt)) |
|---|
| 146 | 172 | goto out; |
|---|
| 147 | | - } |
|---|
| 148 | 173 | node = node->next; |
|---|
| 149 | 174 | } |
|---|
| 150 | 175 | entry = NULL; |
|---|
| .. | .. |
|---|
| 204 | 229 | head = mb_cache_entry_head(cache, key); |
|---|
| 205 | 230 | hlist_bl_lock(head); |
|---|
| 206 | 231 | hlist_bl_for_each_entry(entry, node, head, e_hash_list) { |
|---|
| 207 | | - if (entry->e_key == key && entry->e_value == value) { |
|---|
| 208 | | - atomic_inc(&entry->e_refcnt); |
|---|
| 232 | + if (entry->e_key == key && entry->e_value == value && |
|---|
| 233 | + atomic_inc_not_zero(&entry->e_refcnt)) |
|---|
| 209 | 234 | goto out; |
|---|
| 210 | | - } |
|---|
| 211 | 235 | } |
|---|
| 212 | 236 | entry = NULL; |
|---|
| 213 | 237 | out: |
|---|
| .. | .. |
|---|
| 216 | 240 | } |
|---|
| 217 | 241 | EXPORT_SYMBOL(mb_cache_entry_get); |
|---|
| 218 | 242 | |
|---|
| 219 | | -/* mb_cache_entry_delete - remove a cache entry |
|---|
| 243 | +/* mb_cache_entry_delete - try to remove a cache entry |
|---|
| 220 | 244 | * @cache - cache we work with |
|---|
| 221 | 245 | * @key - key |
|---|
| 222 | 246 | * @value - value |
|---|
| .. | .. |
|---|
| 253 | 277 | } |
|---|
| 254 | 278 | EXPORT_SYMBOL(mb_cache_entry_delete); |
|---|
| 255 | 279 | |
|---|
| 280 | +/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users |
|---|
| 281 | + * @cache - cache we work with |
|---|
| 282 | + * @key - key |
|---|
| 283 | + * @value - value |
|---|
| 284 | + * |
|---|
| 285 | + * Remove entry from cache @cache with key @key and value @value. The removal |
|---|
| 286 | + * happens only if the entry is unused. The function returns NULL in case the |
|---|
| 287 | + * entry was successfully removed or there's no entry in cache. Otherwise the |
|---|
| 288 | + * function grabs reference of the entry that we failed to delete because it |
|---|
| 289 | + * still has users and return it. |
|---|
| 290 | + */ |
|---|
| 291 | +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, |
|---|
| 292 | + u32 key, u64 value) |
|---|
| 293 | +{ |
|---|
| 294 | + struct mb_cache_entry *entry; |
|---|
| 295 | + |
|---|
| 296 | + entry = mb_cache_entry_get(cache, key, value); |
|---|
| 297 | + if (!entry) |
|---|
| 298 | + return NULL; |
|---|
| 299 | + |
|---|
| 300 | + /* |
|---|
| 301 | + * Drop the ref we got from mb_cache_entry_get() and the initial hash |
|---|
| 302 | + * ref if we are the last user |
|---|
| 303 | + */ |
|---|
| 304 | + if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2) |
|---|
| 305 | + return entry; |
|---|
| 306 | + |
|---|
| 307 | + spin_lock(&cache->c_list_lock); |
|---|
| 308 | + if (!list_empty(&entry->e_list)) |
|---|
| 309 | + list_del_init(&entry->e_list); |
|---|
| 310 | + cache->c_entry_count--; |
|---|
| 311 | + spin_unlock(&cache->c_list_lock); |
|---|
| 312 | + __mb_cache_entry_free(cache, entry); |
|---|
| 313 | + return NULL; |
|---|
| 314 | +} |
|---|
| 315 | +EXPORT_SYMBOL(mb_cache_entry_delete_or_get); |
|---|
| 316 | + |
|---|
| 256 | 317 | /* mb_cache_entry_touch - cache entry got used |
|---|
| 257 | 318 | * @cache - cache the entry belongs to |
|---|
| 258 | 319 | * @entry - entry that got used |
|---|
| .. | .. |
|---|
| 262 | 323 | void mb_cache_entry_touch(struct mb_cache *cache, |
|---|
| 263 | 324 | struct mb_cache_entry *entry) |
|---|
| 264 | 325 | { |
|---|
| 265 | | - entry->e_referenced = 1; |
|---|
| 326 | + set_bit(MBE_REFERENCED_B, &entry->e_flags); |
|---|
| 266 | 327 | } |
|---|
| 267 | 328 | EXPORT_SYMBOL(mb_cache_entry_touch); |
|---|
| 268 | 329 | |
|---|
| .. | .. |
|---|
| 280 | 341 | unsigned long nr_to_scan) |
|---|
| 281 | 342 | { |
|---|
| 282 | 343 | struct mb_cache_entry *entry; |
|---|
| 283 | | - struct hlist_bl_head *head; |
|---|
| 284 | 344 | unsigned long shrunk = 0; |
|---|
| 285 | 345 | |
|---|
| 286 | 346 | spin_lock(&cache->c_list_lock); |
|---|
| 287 | 347 | while (nr_to_scan-- && !list_empty(&cache->c_list)) { |
|---|
| 288 | 348 | entry = list_first_entry(&cache->c_list, |
|---|
| 289 | 349 | struct mb_cache_entry, e_list); |
|---|
| 290 | | - if (entry->e_referenced) { |
|---|
| 291 | | - entry->e_referenced = 0; |
|---|
| 350 | + /* Drop initial hash reference if there is no user */ |
|---|
| 351 | + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || |
|---|
| 352 | + atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { |
|---|
| 353 | + clear_bit(MBE_REFERENCED_B, &entry->e_flags); |
|---|
| 292 | 354 | list_move_tail(&entry->e_list, &cache->c_list); |
|---|
| 293 | 355 | continue; |
|---|
| 294 | 356 | } |
|---|
| 295 | 357 | list_del_init(&entry->e_list); |
|---|
| 296 | 358 | cache->c_entry_count--; |
|---|
| 297 | | - /* |
|---|
| 298 | | - * We keep LRU list reference so that entry doesn't go away |
|---|
| 299 | | - * from under us. |
|---|
| 300 | | - */ |
|---|
| 301 | 359 | spin_unlock(&cache->c_list_lock); |
|---|
| 302 | | - head = mb_cache_entry_head(cache, entry->e_key); |
|---|
| 303 | | - hlist_bl_lock(head); |
|---|
| 304 | | - if (!hlist_bl_unhashed(&entry->e_hash_list)) { |
|---|
| 305 | | - hlist_bl_del_init(&entry->e_hash_list); |
|---|
| 306 | | - atomic_dec(&entry->e_refcnt); |
|---|
| 307 | | - } |
|---|
| 308 | | - hlist_bl_unlock(head); |
|---|
| 309 | | - if (mb_cache_entry_put(cache, entry)) |
|---|
| 310 | | - shrunk++; |
|---|
| 360 | + __mb_cache_entry_free(cache, entry); |
|---|
| 361 | + shrunk++; |
|---|
| 311 | 362 | cond_resched(); |
|---|
| 312 | 363 | spin_lock(&cache->c_list_lock); |
|---|
| 313 | 364 | } |
|---|
| .. | .. |
|---|
| 399 | 450 | * point. |
|---|
| 400 | 451 | */ |
|---|
| 401 | 452 | list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { |
|---|
| 402 | | - if (!hlist_bl_unhashed(&entry->e_hash_list)) { |
|---|
| 403 | | - hlist_bl_del_init(&entry->e_hash_list); |
|---|
| 404 | | - atomic_dec(&entry->e_refcnt); |
|---|
| 405 | | - } else |
|---|
| 406 | | - WARN_ON(1); |
|---|
| 407 | 453 | list_del(&entry->e_list); |
|---|
| 408 | 454 | WARN_ON(atomic_read(&entry->e_refcnt) != 1); |
|---|
| 409 | 455 | mb_cache_entry_put(cache, entry); |
|---|