| .. | .. |
|---|
| 9 | 9 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
|---|
| 10 | 10 | */ |
|---|
| 11 | 11 | |
|---|
| 12 | +#include <linux/sunrpc/svc_xprt.h> |
|---|
| 12 | 13 | #include <linux/slab.h> |
|---|
| 13 | 14 | #include <linux/vmalloc.h> |
|---|
| 14 | 15 | #include <linux/sunrpc/addr.h> |
|---|
| .. | .. |
|---|
| 19 | 20 | |
|---|
| 20 | 21 | #include "nfsd.h" |
|---|
| 21 | 22 | #include "cache.h" |
|---|
| 22 | | - |
|---|
| 23 | | -#define NFSDDBG_FACILITY NFSDDBG_REPCACHE |
|---|
| 23 | +#include "trace.h" |
|---|
| 24 | 24 | |
|---|
| 25 | 25 | /* |
|---|
| 26 | 26 | * We use this value to determine the number of hash buckets from the max |
|---|
| .. | .. |
|---|
| 30 | 30 | #define TARGET_BUCKET_SIZE 64 |
|---|
| 31 | 31 | |
|---|
| 32 | 32 | struct nfsd_drc_bucket { |
|---|
| 33 | + struct rb_root rb_head; |
|---|
| 33 | 34 | struct list_head lru_head; |
|---|
| 34 | 35 | spinlock_t cache_lock; |
|---|
| 35 | 36 | }; |
|---|
| 36 | 37 | |
|---|
| 37 | | -static struct nfsd_drc_bucket *drc_hashtbl; |
|---|
| 38 | 38 | static struct kmem_cache *drc_slab; |
|---|
| 39 | | - |
|---|
| 40 | | -/* max number of entries allowed in the cache */ |
|---|
| 41 | | -static unsigned int max_drc_entries; |
|---|
| 42 | | - |
|---|
| 43 | | -/* number of significant bits in the hash value */ |
|---|
| 44 | | -static unsigned int maskbits; |
|---|
| 45 | | -static unsigned int drc_hashsize; |
|---|
| 46 | | - |
|---|
| 47 | | -/* |
|---|
| 48 | | - * Stats and other tracking of on the duplicate reply cache. All of these and |
|---|
| 49 | | - * the "rc" fields in nfsdstats are protected by the cache_lock |
|---|
| 50 | | - */ |
|---|
| 51 | | - |
|---|
| 52 | | -/* total number of entries */ |
|---|
| 53 | | -static atomic_t num_drc_entries; |
|---|
| 54 | | - |
|---|
| 55 | | -/* cache misses due only to checksum comparison failures */ |
|---|
| 56 | | -static unsigned int payload_misses; |
|---|
| 57 | | - |
|---|
| 58 | | -/* amount of memory (in bytes) currently consumed by the DRC */ |
|---|
| 59 | | -static unsigned int drc_mem_usage; |
|---|
| 60 | | - |
|---|
| 61 | | -/* longest hash chain seen */ |
|---|
| 62 | | -static unsigned int longest_chain; |
|---|
| 63 | | - |
|---|
| 64 | | -/* size of cache when we saw the longest hash chain */ |
|---|
| 65 | | -static unsigned int longest_chain_cachesize; |
|---|
| 66 | 39 | |
|---|
| 67 | 40 | static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); |
|---|
| 68 | 41 | static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, |
|---|
| 69 | 42 | struct shrink_control *sc); |
|---|
| 70 | 43 | static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, |
|---|
| 71 | 44 | struct shrink_control *sc); |
|---|
| 72 | | - |
|---|
| 73 | | -static struct shrinker nfsd_reply_cache_shrinker = { |
|---|
| 74 | | - .scan_objects = nfsd_reply_cache_scan, |
|---|
| 75 | | - .count_objects = nfsd_reply_cache_count, |
|---|
| 76 | | - .seeks = 1, |
|---|
| 77 | | -}; |
|---|
| 78 | 45 | |
|---|
| 79 | 46 | /* |
|---|
| 80 | 47 | * Put a cap on the size of the DRC based on the amount of available |
|---|
| .. | .. |
|---|
| 93 | 60 | * ...with a hard cap of 256k entries. In the worst case, each entry will be |
|---|
| 94 | 61 | * ~1k, so the above numbers should give a rough max of the amount of memory |
|---|
| 95 | 62 | * used in k. |
|---|
| 63 | + * |
|---|
| 64 | + * XXX: these limits are per-container, so memory used will increase |
|---|
| 65 | + * linearly with number of containers. Maybe that's OK. |
|---|
| 96 | 66 | */ |
|---|
| 97 | 67 | static unsigned int |
|---|
| 98 | 68 | nfsd_cache_size_limit(void) |
|---|
| 99 | 69 | { |
|---|
| 100 | 70 | unsigned int limit; |
|---|
| 101 | | - unsigned long low_pages = totalram_pages - totalhigh_pages; |
|---|
| 71 | + unsigned long low_pages = totalram_pages() - totalhigh_pages(); |
|---|
| 102 | 72 | |
|---|
| 103 | 73 | limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); |
|---|
| 104 | 74 | return min_t(unsigned int, limit, 256*1024); |
|---|
| .. | .. |
|---|
| 115 | 85 | } |
|---|
| 116 | 86 | |
|---|
| 117 | 87 | static u32 |
|---|
| 118 | | -nfsd_cache_hash(__be32 xid) |
|---|
| 88 | +nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) |
|---|
| 119 | 89 | { |
|---|
| 120 | | - return hash_32(be32_to_cpu(xid), maskbits); |
|---|
| 90 | + return hash_32(be32_to_cpu(xid), nn->maskbits); |
|---|
| 121 | 91 | } |
|---|
| 122 | 92 | |
|---|
| 123 | 93 | static struct svc_cacherep * |
|---|
| 124 | | -nfsd_reply_cache_alloc(void) |
|---|
| 94 | +nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, |
|---|
| 95 | + struct nfsd_net *nn) |
|---|
| 125 | 96 | { |
|---|
| 126 | 97 | struct svc_cacherep *rp; |
|---|
| 127 | 98 | |
|---|
| .. | .. |
|---|
| 129 | 100 | if (rp) { |
|---|
| 130 | 101 | rp->c_state = RC_UNUSED; |
|---|
| 131 | 102 | rp->c_type = RC_NOCACHE; |
|---|
| 103 | + RB_CLEAR_NODE(&rp->c_node); |
|---|
| 132 | 104 | INIT_LIST_HEAD(&rp->c_lru); |
|---|
| 105 | + |
|---|
| 106 | + memset(&rp->c_key, 0, sizeof(rp->c_key)); |
|---|
| 107 | + rp->c_key.k_xid = rqstp->rq_xid; |
|---|
| 108 | + rp->c_key.k_proc = rqstp->rq_proc; |
|---|
| 109 | + rpc_copy_addr((struct sockaddr *)&rp->c_key.k_addr, svc_addr(rqstp)); |
|---|
| 110 | + rpc_set_port((struct sockaddr *)&rp->c_key.k_addr, rpc_get_port(svc_addr(rqstp))); |
|---|
| 111 | + rp->c_key.k_prot = rqstp->rq_prot; |
|---|
| 112 | + rp->c_key.k_vers = rqstp->rq_vers; |
|---|
| 113 | + rp->c_key.k_len = rqstp->rq_arg.len; |
|---|
| 114 | + rp->c_key.k_csum = csum; |
|---|
| 133 | 115 | } |
|---|
| 134 | 116 | return rp; |
|---|
| 135 | 117 | } |
|---|
| 136 | 118 | |
|---|
| 137 | 119 | static void |
|---|
| 138 | | -nfsd_reply_cache_free_locked(struct svc_cacherep *rp) |
|---|
| 120 | +nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, |
|---|
| 121 | + struct nfsd_net *nn) |
|---|
| 139 | 122 | { |
|---|
| 140 | 123 | if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { |
|---|
| 141 | | - drc_mem_usage -= rp->c_replvec.iov_len; |
|---|
| 124 | + nn->drc_mem_usage -= rp->c_replvec.iov_len; |
|---|
| 142 | 125 | kfree(rp->c_replvec.iov_base); |
|---|
| 143 | 126 | } |
|---|
| 144 | | - list_del(&rp->c_lru); |
|---|
| 145 | | - atomic_dec(&num_drc_entries); |
|---|
| 146 | | - drc_mem_usage -= sizeof(*rp); |
|---|
| 127 | + if (rp->c_state != RC_UNUSED) { |
|---|
| 128 | + rb_erase(&rp->c_node, &b->rb_head); |
|---|
| 129 | + list_del(&rp->c_lru); |
|---|
| 130 | + atomic_dec(&nn->num_drc_entries); |
|---|
| 131 | + nn->drc_mem_usage -= sizeof(*rp); |
|---|
| 132 | + } |
|---|
| 147 | 133 | kmem_cache_free(drc_slab, rp); |
|---|
| 148 | 134 | } |
|---|
| 149 | 135 | |
|---|
| 150 | 136 | static void |
|---|
| 151 | | -nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) |
|---|
| 137 | +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, |
|---|
| 138 | + struct nfsd_net *nn) |
|---|
| 152 | 139 | { |
|---|
| 153 | 140 | spin_lock(&b->cache_lock); |
|---|
| 154 | | - nfsd_reply_cache_free_locked(rp); |
|---|
| 141 | + nfsd_reply_cache_free_locked(b, rp, nn); |
|---|
| 155 | 142 | spin_unlock(&b->cache_lock); |
|---|
| 156 | 143 | } |
|---|
| 157 | 144 | |
|---|
| 158 | | -int nfsd_reply_cache_init(void) |
|---|
| 145 | +int nfsd_drc_slab_create(void) |
|---|
| 146 | +{ |
|---|
| 147 | + drc_slab = kmem_cache_create("nfsd_drc", |
|---|
| 148 | + sizeof(struct svc_cacherep), 0, 0, NULL); |
|---|
| 149 | + return drc_slab ? 0: -ENOMEM; |
|---|
| 150 | +} |
|---|
| 151 | + |
|---|
| 152 | +void nfsd_drc_slab_free(void) |
|---|
| 153 | +{ |
|---|
| 154 | + kmem_cache_destroy(drc_slab); |
|---|
| 155 | +} |
|---|
| 156 | + |
|---|
| 157 | +int nfsd_reply_cache_init(struct nfsd_net *nn) |
|---|
| 159 | 158 | { |
|---|
| 160 | 159 | unsigned int hashsize; |
|---|
| 161 | 160 | unsigned int i; |
|---|
| 162 | 161 | int status = 0; |
|---|
| 163 | 162 | |
|---|
| 164 | | - max_drc_entries = nfsd_cache_size_limit(); |
|---|
| 165 | | - atomic_set(&num_drc_entries, 0); |
|---|
| 166 | | - hashsize = nfsd_hashsize(max_drc_entries); |
|---|
| 167 | | - maskbits = ilog2(hashsize); |
|---|
| 163 | + nn->max_drc_entries = nfsd_cache_size_limit(); |
|---|
| 164 | + atomic_set(&nn->num_drc_entries, 0); |
|---|
| 165 | + hashsize = nfsd_hashsize(nn->max_drc_entries); |
|---|
| 166 | + nn->maskbits = ilog2(hashsize); |
|---|
| 168 | 167 | |
|---|
| 169 | | - status = register_shrinker(&nfsd_reply_cache_shrinker); |
|---|
| 168 | + nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; |
|---|
| 169 | + nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; |
|---|
| 170 | + nn->nfsd_reply_cache_shrinker.seeks = 1; |
|---|
| 171 | + status = register_shrinker(&nn->nfsd_reply_cache_shrinker); |
|---|
| 170 | 172 | if (status) |
|---|
| 171 | | - return status; |
|---|
| 172 | | - |
|---|
| 173 | | - drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), |
|---|
| 174 | | - 0, 0, NULL); |
|---|
| 175 | | - if (!drc_slab) |
|---|
| 176 | 173 | goto out_nomem; |
|---|
| 177 | 174 | |
|---|
| 178 | | - drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); |
|---|
| 179 | | - if (!drc_hashtbl) { |
|---|
| 180 | | - drc_hashtbl = vzalloc(array_size(hashsize, |
|---|
| 181 | | - sizeof(*drc_hashtbl))); |
|---|
| 182 | | - if (!drc_hashtbl) |
|---|
| 183 | | - goto out_nomem; |
|---|
| 184 | | - } |
|---|
| 175 | + nn->drc_hashtbl = kvzalloc(array_size(hashsize, |
|---|
| 176 | + sizeof(*nn->drc_hashtbl)), GFP_KERNEL); |
|---|
| 177 | + if (!nn->drc_hashtbl) |
|---|
| 178 | + goto out_shrinker; |
|---|
| 185 | 179 | |
|---|
| 186 | 180 | for (i = 0; i < hashsize; i++) { |
|---|
| 187 | | - INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); |
|---|
| 188 | | - spin_lock_init(&drc_hashtbl[i].cache_lock); |
|---|
| 181 | + INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); |
|---|
| 182 | + spin_lock_init(&nn->drc_hashtbl[i].cache_lock); |
|---|
| 189 | 183 | } |
|---|
| 190 | | - drc_hashsize = hashsize; |
|---|
| 184 | + nn->drc_hashsize = hashsize; |
|---|
| 191 | 185 | |
|---|
| 192 | 186 | return 0; |
|---|
| 187 | +out_shrinker: |
|---|
| 188 | + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); |
|---|
| 193 | 189 | out_nomem: |
|---|
| 194 | 190 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); |
|---|
| 195 | | - nfsd_reply_cache_shutdown(); |
|---|
| 196 | 191 | return -ENOMEM; |
|---|
| 197 | 192 | } |
|---|
| 198 | 193 | |
|---|
| 199 | | -void nfsd_reply_cache_shutdown(void) |
|---|
| 194 | +void nfsd_reply_cache_shutdown(struct nfsd_net *nn) |
|---|
| 200 | 195 | { |
|---|
| 201 | 196 | struct svc_cacherep *rp; |
|---|
| 202 | 197 | unsigned int i; |
|---|
| 203 | 198 | |
|---|
| 204 | | - unregister_shrinker(&nfsd_reply_cache_shrinker); |
|---|
| 199 | + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); |
|---|
| 205 | 200 | |
|---|
| 206 | | - for (i = 0; i < drc_hashsize; i++) { |
|---|
| 207 | | - struct list_head *head = &drc_hashtbl[i].lru_head; |
|---|
| 201 | + for (i = 0; i < nn->drc_hashsize; i++) { |
|---|
| 202 | + struct list_head *head = &nn->drc_hashtbl[i].lru_head; |
|---|
| 208 | 203 | while (!list_empty(head)) { |
|---|
| 209 | 204 | rp = list_first_entry(head, struct svc_cacherep, c_lru); |
|---|
| 210 | | - nfsd_reply_cache_free_locked(rp); |
|---|
| 205 | + nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i], |
|---|
| 206 | + rp, nn); |
|---|
| 211 | 207 | } |
|---|
| 212 | 208 | } |
|---|
| 213 | 209 | |
|---|
| 214 | | - kvfree(drc_hashtbl); |
|---|
| 215 | | - drc_hashtbl = NULL; |
|---|
| 216 | | - drc_hashsize = 0; |
|---|
| 210 | + kvfree(nn->drc_hashtbl); |
|---|
| 211 | + nn->drc_hashtbl = NULL; |
|---|
| 212 | + nn->drc_hashsize = 0; |
|---|
| 217 | 213 | |
|---|
| 218 | | - kmem_cache_destroy(drc_slab); |
|---|
| 219 | | - drc_slab = NULL; |
|---|
| 220 | 214 | } |
|---|
| 221 | 215 | |
|---|
| 222 | 216 | /* |
|---|
| .. | .. |
|---|
| 231 | 225 | } |
|---|
| 232 | 226 | |
|---|
| 233 | 227 | static long |
|---|
| 234 | | -prune_bucket(struct nfsd_drc_bucket *b) |
|---|
| 228 | +prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) |
|---|
| 235 | 229 | { |
|---|
| 236 | 230 | struct svc_cacherep *rp, *tmp; |
|---|
| 237 | 231 | long freed = 0; |
|---|
| .. | .. |
|---|
| 243 | 237 | */ |
|---|
| 244 | 238 | if (rp->c_state == RC_INPROG) |
|---|
| 245 | 239 | continue; |
|---|
| 246 | | - if (atomic_read(&num_drc_entries) <= max_drc_entries && |
|---|
| 240 | + if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries && |
|---|
| 247 | 241 | time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) |
|---|
| 248 | 242 | break; |
|---|
| 249 | | - nfsd_reply_cache_free_locked(rp); |
|---|
| 243 | + nfsd_reply_cache_free_locked(b, rp, nn); |
|---|
| 250 | 244 | freed++; |
|---|
| 251 | 245 | } |
|---|
| 252 | 246 | return freed; |
|---|
| .. | .. |
|---|
| 257 | 251 | * Also prune the oldest ones when the total exceeds the max number of entries. |
|---|
| 258 | 252 | */ |
|---|
| 259 | 253 | static long |
|---|
| 260 | | -prune_cache_entries(void) |
|---|
| 254 | +prune_cache_entries(struct nfsd_net *nn) |
|---|
| 261 | 255 | { |
|---|
| 262 | 256 | unsigned int i; |
|---|
| 263 | 257 | long freed = 0; |
|---|
| 264 | 258 | |
|---|
| 265 | | - for (i = 0; i < drc_hashsize; i++) { |
|---|
| 266 | | - struct nfsd_drc_bucket *b = &drc_hashtbl[i]; |
|---|
| 259 | + for (i = 0; i < nn->drc_hashsize; i++) { |
|---|
| 260 | + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i]; |
|---|
| 267 | 261 | |
|---|
| 268 | 262 | if (list_empty(&b->lru_head)) |
|---|
| 269 | 263 | continue; |
|---|
| 270 | 264 | spin_lock(&b->cache_lock); |
|---|
| 271 | | - freed += prune_bucket(b); |
|---|
| 265 | + freed += prune_bucket(b, nn); |
|---|
| 272 | 266 | spin_unlock(&b->cache_lock); |
|---|
| 273 | 267 | } |
|---|
| 274 | 268 | return freed; |
|---|
| .. | .. |
|---|
| 277 | 271 | static unsigned long |
|---|
| 278 | 272 | nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 279 | 273 | { |
|---|
| 280 | | - return atomic_read(&num_drc_entries); |
|---|
| 274 | + struct nfsd_net *nn = container_of(shrink, |
|---|
| 275 | + struct nfsd_net, nfsd_reply_cache_shrinker); |
|---|
| 276 | + |
|---|
| 277 | + return atomic_read(&nn->num_drc_entries); |
|---|
| 281 | 278 | } |
|---|
| 282 | 279 | |
|---|
| 283 | 280 | static unsigned long |
|---|
| 284 | 281 | nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) |
|---|
| 285 | 282 | { |
|---|
| 286 | | - return prune_cache_entries(); |
|---|
| 283 | + struct nfsd_net *nn = container_of(shrink, |
|---|
| 284 | + struct nfsd_net, nfsd_reply_cache_shrinker); |
|---|
| 285 | + |
|---|
| 286 | + return prune_cache_entries(nn); |
|---|
| 287 | 287 | } |
|---|
| 288 | 288 | /* |
|---|
| 289 | 289 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes |
|---|
| .. | .. |
|---|
| 318 | 318 | return csum; |
|---|
| 319 | 319 | } |
|---|
| 320 | 320 | |
|---|
| 321 | | -static bool |
|---|
| 322 | | -nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) |
|---|
| 321 | +static int |
|---|
| 322 | +nfsd_cache_key_cmp(const struct svc_cacherep *key, |
|---|
| 323 | + const struct svc_cacherep *rp, struct nfsd_net *nn) |
|---|
| 323 | 324 | { |
|---|
| 324 | | - /* Check RPC XID first */ |
|---|
| 325 | | - if (rqstp->rq_xid != rp->c_xid) |
|---|
| 326 | | - return false; |
|---|
| 327 | | - /* compare checksum of NFS data */ |
|---|
| 328 | | - if (csum != rp->c_csum) { |
|---|
| 329 | | - ++payload_misses; |
|---|
| 330 | | - return false; |
|---|
| 325 | + if (key->c_key.k_xid == rp->c_key.k_xid && |
|---|
| 326 | + key->c_key.k_csum != rp->c_key.k_csum) { |
|---|
| 327 | + ++nn->payload_misses; |
|---|
| 328 | + trace_nfsd_drc_mismatch(nn, key, rp); |
|---|
| 331 | 329 | } |
|---|
| 332 | 330 | |
|---|
| 333 | | - /* Other discriminators */ |
|---|
| 334 | | - if (rqstp->rq_proc != rp->c_proc || |
|---|
| 335 | | - rqstp->rq_prot != rp->c_prot || |
|---|
| 336 | | - rqstp->rq_vers != rp->c_vers || |
|---|
| 337 | | - rqstp->rq_arg.len != rp->c_len || |
|---|
| 338 | | - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || |
|---|
| 339 | | - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) |
|---|
| 340 | | - return false; |
|---|
| 341 | | - |
|---|
| 342 | | - return true; |
|---|
| 331 | + return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); |
|---|
| 343 | 332 | } |
|---|
| 344 | 333 | |
|---|
| 345 | 334 | /* |
|---|
| 346 | 335 | * Search the request hash for an entry that matches the given rqstp. |
|---|
| 347 | 336 | * Must be called with cache_lock held. Returns the found entry or |
|---|
| 348 | | - * NULL on failure. |
|---|
| 337 | + * inserts an empty key on failure. |
|---|
| 349 | 338 | */ |
|---|
| 350 | 339 | static struct svc_cacherep * |
|---|
| 351 | | -nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, |
|---|
| 352 | | - __wsum csum) |
|---|
| 340 | +nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key, |
|---|
| 341 | + struct nfsd_net *nn) |
|---|
| 353 | 342 | { |
|---|
| 354 | | - struct svc_cacherep *rp, *ret = NULL; |
|---|
| 355 | | - struct list_head *rh = &b->lru_head; |
|---|
| 343 | + struct svc_cacherep *rp, *ret = key; |
|---|
| 344 | + struct rb_node **p = &b->rb_head.rb_node, |
|---|
| 345 | + *parent = NULL; |
|---|
| 356 | 346 | unsigned int entries = 0; |
|---|
| 347 | + int cmp; |
|---|
| 357 | 348 | |
|---|
| 358 | | - list_for_each_entry(rp, rh, c_lru) { |
|---|
| 349 | + while (*p != NULL) { |
|---|
| 359 | 350 | ++entries; |
|---|
| 360 | | - if (nfsd_cache_match(rqstp, csum, rp)) { |
|---|
| 351 | + parent = *p; |
|---|
| 352 | + rp = rb_entry(parent, struct svc_cacherep, c_node); |
|---|
| 353 | + |
|---|
| 354 | + cmp = nfsd_cache_key_cmp(key, rp, nn); |
|---|
| 355 | + if (cmp < 0) |
|---|
| 356 | + p = &parent->rb_left; |
|---|
| 357 | + else if (cmp > 0) |
|---|
| 358 | + p = &parent->rb_right; |
|---|
| 359 | + else { |
|---|
| 361 | 360 | ret = rp; |
|---|
| 362 | | - break; |
|---|
| 361 | + goto out; |
|---|
| 363 | 362 | } |
|---|
| 364 | 363 | } |
|---|
| 365 | | - |
|---|
| 364 | + rb_link_node(&key->c_node, parent, p); |
|---|
| 365 | + rb_insert_color(&key->c_node, &b->rb_head); |
|---|
| 366 | +out: |
|---|
| 366 | 367 | /* tally hash chain length stats */ |
|---|
| 367 | | - if (entries > longest_chain) { |
|---|
| 368 | | - longest_chain = entries; |
|---|
| 369 | | - longest_chain_cachesize = atomic_read(&num_drc_entries); |
|---|
| 370 | | - } else if (entries == longest_chain) { |
|---|
| 368 | + if (entries > nn->longest_chain) { |
|---|
| 369 | + nn->longest_chain = entries; |
|---|
| 370 | + nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries); |
|---|
| 371 | + } else if (entries == nn->longest_chain) { |
|---|
| 371 | 372 | /* prefer to keep the smallest cachesize possible here */ |
|---|
| 372 | | - longest_chain_cachesize = min_t(unsigned int, |
|---|
| 373 | | - longest_chain_cachesize, |
|---|
| 374 | | - atomic_read(&num_drc_entries)); |
|---|
| 373 | + nn->longest_chain_cachesize = min_t(unsigned int, |
|---|
| 374 | + nn->longest_chain_cachesize, |
|---|
| 375 | + atomic_read(&nn->num_drc_entries)); |
|---|
| 375 | 376 | } |
|---|
| 376 | 377 | |
|---|
| 378 | + lru_put_end(b, ret); |
|---|
| 377 | 379 | return ret; |
|---|
| 378 | 380 | } |
|---|
| 379 | 381 | |
|---|
| 380 | | -/* |
|---|
| 382 | +/** |
|---|
| 383 | + * nfsd_cache_lookup - Find an entry in the duplicate reply cache |
|---|
| 384 | + * @rqstp: Incoming Call to find |
|---|
| 385 | + * |
|---|
| 381 | 386 | * Try to find an entry matching the current call in the cache. When none |
|---|
| 382 | 387 | * is found, we try to grab the oldest expired entry off the LRU list. If |
|---|
| 383 | 388 | * a suitable one isn't there, then drop the cache_lock and allocate a |
|---|
| 384 | 389 | * new one, then search again in case one got inserted while this thread |
|---|
| 385 | 390 | * didn't hold the lock. |
|---|
| 391 | + * |
|---|
| 392 | + * Return values: |
|---|
| 393 | + * %RC_DOIT: Process the request normally |
|---|
| 394 | + * %RC_REPLY: Reply from cache |
|---|
| 395 | + * %RC_DROPIT: Do not process the request further |
|---|
| 386 | 396 | */ |
|---|
| 387 | | -int |
|---|
| 388 | | -nfsd_cache_lookup(struct svc_rqst *rqstp) |
|---|
| 397 | +int nfsd_cache_lookup(struct svc_rqst *rqstp) |
|---|
| 389 | 398 | { |
|---|
| 399 | + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); |
|---|
| 390 | 400 | struct svc_cacherep *rp, *found; |
|---|
| 391 | 401 | __be32 xid = rqstp->rq_xid; |
|---|
| 392 | | - u32 proto = rqstp->rq_prot, |
|---|
| 393 | | - vers = rqstp->rq_vers, |
|---|
| 394 | | - proc = rqstp->rq_proc; |
|---|
| 395 | 402 | __wsum csum; |
|---|
| 396 | | - u32 hash = nfsd_cache_hash(xid); |
|---|
| 397 | | - struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; |
|---|
| 403 | + u32 hash = nfsd_cache_hash(xid, nn); |
|---|
| 404 | + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; |
|---|
| 398 | 405 | int type = rqstp->rq_cachetype; |
|---|
| 399 | 406 | int rtn = RC_DOIT; |
|---|
| 400 | 407 | |
|---|
| 401 | 408 | rqstp->rq_cacherep = NULL; |
|---|
| 402 | 409 | if (type == RC_NOCACHE) { |
|---|
| 403 | 410 | nfsdstats.rcnocache++; |
|---|
| 404 | | - return rtn; |
|---|
| 411 | + goto out; |
|---|
| 405 | 412 | } |
|---|
| 406 | 413 | |
|---|
| 407 | 414 | csum = nfsd_cache_csum(rqstp); |
|---|
| .. | .. |
|---|
| 410 | 417 | * Since the common case is a cache miss followed by an insert, |
|---|
| 411 | 418 | * preallocate an entry. |
|---|
| 412 | 419 | */ |
|---|
| 413 | | - rp = nfsd_reply_cache_alloc(); |
|---|
| 420 | + rp = nfsd_reply_cache_alloc(rqstp, csum, nn); |
|---|
| 421 | + if (!rp) |
|---|
| 422 | + goto out; |
|---|
| 423 | + |
|---|
| 414 | 424 | spin_lock(&b->cache_lock); |
|---|
| 415 | | - if (likely(rp)) { |
|---|
| 416 | | - atomic_inc(&num_drc_entries); |
|---|
| 417 | | - drc_mem_usage += sizeof(*rp); |
|---|
| 418 | | - } |
|---|
| 419 | | - |
|---|
| 420 | | - /* go ahead and prune the cache */ |
|---|
| 421 | | - prune_bucket(b); |
|---|
| 422 | | - |
|---|
| 423 | | - found = nfsd_cache_search(b, rqstp, csum); |
|---|
| 424 | | - if (found) { |
|---|
| 425 | | - if (likely(rp)) |
|---|
| 426 | | - nfsd_reply_cache_free_locked(rp); |
|---|
| 425 | + found = nfsd_cache_insert(b, rp, nn); |
|---|
| 426 | + if (found != rp) { |
|---|
| 427 | + nfsd_reply_cache_free_locked(NULL, rp, nn); |
|---|
| 427 | 428 | rp = found; |
|---|
| 428 | 429 | goto found_entry; |
|---|
| 429 | | - } |
|---|
| 430 | | - |
|---|
| 431 | | - if (!rp) { |
|---|
| 432 | | - dprintk("nfsd: unable to allocate DRC entry!\n"); |
|---|
| 433 | | - goto out; |
|---|
| 434 | 430 | } |
|---|
| 435 | 431 | |
|---|
| 436 | 432 | nfsdstats.rcmisses++; |
|---|
| 437 | 433 | rqstp->rq_cacherep = rp; |
|---|
| 438 | 434 | rp->c_state = RC_INPROG; |
|---|
| 439 | | - rp->c_xid = xid; |
|---|
| 440 | | - rp->c_proc = proc; |
|---|
| 441 | | - rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); |
|---|
| 442 | | - rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); |
|---|
| 443 | | - rp->c_prot = proto; |
|---|
| 444 | | - rp->c_vers = vers; |
|---|
| 445 | | - rp->c_len = rqstp->rq_arg.len; |
|---|
| 446 | | - rp->c_csum = csum; |
|---|
| 447 | 435 | |
|---|
| 448 | | - lru_put_end(b, rp); |
|---|
| 436 | + atomic_inc(&nn->num_drc_entries); |
|---|
| 437 | + nn->drc_mem_usage += sizeof(*rp); |
|---|
| 449 | 438 | |
|---|
| 450 | | - /* release any buffer */ |
|---|
| 451 | | - if (rp->c_type == RC_REPLBUFF) { |
|---|
| 452 | | - drc_mem_usage -= rp->c_replvec.iov_len; |
|---|
| 453 | | - kfree(rp->c_replvec.iov_base); |
|---|
| 454 | | - rp->c_replvec.iov_base = NULL; |
|---|
| 455 | | - } |
|---|
| 456 | | - rp->c_type = RC_NOCACHE; |
|---|
| 457 | | - out: |
|---|
| 439 | + /* go ahead and prune the cache */ |
|---|
| 440 | + prune_bucket(b, nn); |
|---|
| 441 | + |
|---|
| 442 | +out_unlock: |
|---|
| 458 | 443 | spin_unlock(&b->cache_lock); |
|---|
| 444 | +out: |
|---|
| 459 | 445 | return rtn; |
|---|
| 460 | 446 | |
|---|
| 461 | 447 | found_entry: |
|---|
| 462 | | - nfsdstats.rchits++; |
|---|
| 463 | 448 | /* We found a matching entry which is either in progress or done. */ |
|---|
| 464 | | - lru_put_end(b, rp); |
|---|
| 465 | | - |
|---|
| 449 | + nfsdstats.rchits++; |
|---|
| 466 | 450 | rtn = RC_DROPIT; |
|---|
| 451 | + |
|---|
| 467 | 452 | /* Request being processed */ |
|---|
| 468 | 453 | if (rp->c_state == RC_INPROG) |
|---|
| 469 | | - goto out; |
|---|
| 454 | + goto out_trace; |
|---|
| 470 | 455 | |
|---|
| 471 | 456 | /* From the hall of fame of impractical attacks: |
|---|
| 472 | 457 | * Is this a user who tries to snoop on the cache? */ |
|---|
| 473 | 458 | rtn = RC_DOIT; |
|---|
| 474 | 459 | if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure) |
|---|
| 475 | | - goto out; |
|---|
| 460 | + goto out_trace; |
|---|
| 476 | 461 | |
|---|
| 477 | 462 | /* Compose RPC reply header */ |
|---|
| 478 | 463 | switch (rp->c_type) { |
|---|
| .. | .. |
|---|
| 484 | 469 | break; |
|---|
| 485 | 470 | case RC_REPLBUFF: |
|---|
| 486 | 471 | if (!nfsd_cache_append(rqstp, &rp->c_replvec)) |
|---|
| 487 | | - goto out; /* should not happen */ |
|---|
| 472 | + goto out_unlock; /* should not happen */ |
|---|
| 488 | 473 | rtn = RC_REPLY; |
|---|
| 489 | 474 | break; |
|---|
| 490 | 475 | default: |
|---|
| 491 | | - printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); |
|---|
| 492 | | - nfsd_reply_cache_free_locked(rp); |
|---|
| 476 | + WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type); |
|---|
| 493 | 477 | } |
|---|
| 494 | 478 | |
|---|
| 495 | | - goto out; |
|---|
| 479 | +out_trace: |
|---|
| 480 | + trace_nfsd_drc_found(nn, rqstp, rtn); |
|---|
| 481 | + goto out_unlock; |
|---|
| 496 | 482 | } |
|---|
| 497 | 483 | |
|---|
| 498 | | -/* |
|---|
| 499 | | - * Update a cache entry. This is called from nfsd_dispatch when |
|---|
| 500 | | - * the procedure has been executed and the complete reply is in |
|---|
| 501 | | - * rqstp->rq_res. |
|---|
| 484 | +/** |
|---|
| 485 | + * nfsd_cache_update - Update an entry in the duplicate reply cache. |
|---|
| 486 | + * @rqstp: svc_rqst with a finished Reply |
|---|
| 487 | + * @cachetype: which cache to update |
|---|
| 488 | + * @statp: Reply's status code |
|---|
| 489 | + * |
|---|
| 490 | + * This is called from nfsd_dispatch when the procedure has been |
|---|
| 491 | + * executed and the complete reply is in rqstp->rq_res. |
|---|
| 502 | 492 | * |
|---|
| 503 | 493 | * We're copying around data here rather than swapping buffers because |
|---|
| 504 | 494 | * the toplevel loop requires max-sized buffers, which would be a waste |
|---|
| .. | .. |
|---|
| 511 | 501 | * nfsd failed to encode a reply that otherwise would have been cached. |
|---|
| 512 | 502 | * In this case, nfsd_cache_update is called with statp == NULL. |
|---|
| 513 | 503 | */ |
|---|
| 514 | | -void |
|---|
| 515 | | -nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) |
|---|
| 504 | +void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) |
|---|
| 516 | 505 | { |
|---|
| 506 | + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); |
|---|
| 517 | 507 | struct svc_cacherep *rp = rqstp->rq_cacherep; |
|---|
| 518 | 508 | struct kvec *resv = &rqstp->rq_res.head[0], *cachv; |
|---|
| 519 | 509 | u32 hash; |
|---|
| .. | .. |
|---|
| 524 | 514 | if (!rp) |
|---|
| 525 | 515 | return; |
|---|
| 526 | 516 | |
|---|
| 527 | | - hash = nfsd_cache_hash(rp->c_xid); |
|---|
| 528 | | - b = &drc_hashtbl[hash]; |
|---|
| 517 | + hash = nfsd_cache_hash(rp->c_key.k_xid, nn); |
|---|
| 518 | + b = &nn->drc_hashtbl[hash]; |
|---|
| 529 | 519 | |
|---|
| 530 | 520 | len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); |
|---|
| 531 | 521 | len >>= 2; |
|---|
| 532 | 522 | |
|---|
| 533 | 523 | /* Don't cache excessive amounts of data and XDR failures */ |
|---|
| 534 | 524 | if (!statp || len > (256 >> 2)) { |
|---|
| 535 | | - nfsd_reply_cache_free(b, rp); |
|---|
| 525 | + nfsd_reply_cache_free(b, rp, nn); |
|---|
| 536 | 526 | return; |
|---|
| 537 | 527 | } |
|---|
| 538 | 528 | |
|---|
| .. | .. |
|---|
| 547 | 537 | bufsize = len << 2; |
|---|
| 548 | 538 | cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); |
|---|
| 549 | 539 | if (!cachv->iov_base) { |
|---|
| 550 | | - nfsd_reply_cache_free(b, rp); |
|---|
| 540 | + nfsd_reply_cache_free(b, rp, nn); |
|---|
| 551 | 541 | return; |
|---|
| 552 | 542 | } |
|---|
| 553 | 543 | cachv->iov_len = bufsize; |
|---|
| 554 | 544 | memcpy(cachv->iov_base, statp, bufsize); |
|---|
| 555 | 545 | break; |
|---|
| 556 | 546 | case RC_NOCACHE: |
|---|
| 557 | | - nfsd_reply_cache_free(b, rp); |
|---|
| 547 | + nfsd_reply_cache_free(b, rp, nn); |
|---|
| 558 | 548 | return; |
|---|
| 559 | 549 | } |
|---|
| 560 | 550 | spin_lock(&b->cache_lock); |
|---|
| 561 | | - drc_mem_usage += bufsize; |
|---|
| 551 | + nn->drc_mem_usage += bufsize; |
|---|
| 562 | 552 | lru_put_end(b, rp); |
|---|
| 563 | 553 | rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); |
|---|
| 564 | 554 | rp->c_type = cachetype; |
|---|
| .. | .. |
|---|
| 594 | 584 | */ |
|---|
| 595 | 585 | static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) |
|---|
| 596 | 586 | { |
|---|
| 597 | | - seq_printf(m, "max entries: %u\n", max_drc_entries); |
|---|
| 587 | + struct nfsd_net *nn = m->private; |
|---|
| 588 | + |
|---|
| 589 | + seq_printf(m, "max entries: %u\n", nn->max_drc_entries); |
|---|
| 598 | 590 | seq_printf(m, "num entries: %u\n", |
|---|
| 599 | | - atomic_read(&num_drc_entries)); |
|---|
| 600 | | - seq_printf(m, "hash buckets: %u\n", 1 << maskbits); |
|---|
| 601 | | - seq_printf(m, "mem usage: %u\n", drc_mem_usage); |
|---|
| 591 | + atomic_read(&nn->num_drc_entries)); |
|---|
| 592 | + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); |
|---|
| 593 | + seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); |
|---|
| 602 | 594 | seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); |
|---|
| 603 | 595 | seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); |
|---|
| 604 | 596 | seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); |
|---|
| 605 | | - seq_printf(m, "payload misses: %u\n", payload_misses); |
|---|
| 606 | | - seq_printf(m, "longest chain len: %u\n", longest_chain); |
|---|
| 607 | | - seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); |
|---|
| 597 | + seq_printf(m, "payload misses: %u\n", nn->payload_misses); |
|---|
| 598 | + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); |
|---|
| 599 | + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); |
|---|
| 608 | 600 | return 0; |
|---|
| 609 | 601 | } |
|---|
| 610 | 602 | |
|---|
| 611 | 603 | int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) |
|---|
| 612 | 604 | { |
|---|
| 613 | | - return single_open(file, nfsd_reply_cache_stats_show, NULL); |
|---|
| 605 | + struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, |
|---|
| 606 | + nfsd_net_id); |
|---|
| 607 | + |
|---|
| 608 | + return single_open(file, nfsd_reply_cache_stats_show, nn); |
|---|
| 614 | 609 | } |
|---|