| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (c) 2007-2014 Nicira, Inc. |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or |
|---|
| 5 | | - * modify it under the terms of version 2 of the GNU General Public |
|---|
| 6 | | - * License as published by the Free Software Foundation. |
|---|
| 7 | | - * |
|---|
| 8 | | - * This program is distributed in the hope that it will be useful, but |
|---|
| 9 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 11 | | - * General Public License for more details. |
|---|
| 12 | | - * |
|---|
| 13 | | - * You should have received a copy of the GNU General Public License |
|---|
| 14 | | - * along with this program; if not, write to the Free Software |
|---|
| 15 | | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|---|
| 16 | | - * 02110-1301, USA |
|---|
| 17 | 4 | */ |
|---|
| 18 | 5 | |
|---|
| 19 | 6 | #include "flow.h" |
|---|
| .. | .. |
|---|
| 42 | 29 | #include <linux/icmp.h> |
|---|
| 43 | 30 | #include <linux/icmpv6.h> |
|---|
| 44 | 31 | #include <linux/rculist.h> |
|---|
| 32 | +#include <linux/sort.h> |
|---|
| 45 | 33 | #include <net/ip.h> |
|---|
| 46 | 34 | #include <net/ipv6.h> |
|---|
| 47 | 35 | #include <net/ndisc.h> |
|---|
| 48 | 36 | |
|---|
| 49 | 37 | #define TBL_MIN_BUCKETS 1024 |
|---|
| 38 | +#define MASK_ARRAY_SIZE_MIN 16 |
|---|
| 50 | 39 | #define REHASH_INTERVAL (10 * 60 * HZ) |
|---|
| 40 | + |
|---|
| 41 | +#define MC_DEFAULT_HASH_ENTRIES 256 |
|---|
| 42 | +#define MC_HASH_SHIFT 8 |
|---|
| 43 | +#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) |
|---|
| 51 | 44 | |
|---|
| 52 | 45 | static struct kmem_cache *flow_cache; |
|---|
| 53 | 46 | struct kmem_cache *flow_stats_cache __read_mostly; |
|---|
| .. | .. |
|---|
| 79 | 72 | struct sw_flow *ovs_flow_alloc(void) |
|---|
| 80 | 73 | { |
|---|
| 81 | 74 | struct sw_flow *flow; |
|---|
| 82 | | - struct flow_stats *stats; |
|---|
| 75 | + struct sw_flow_stats *stats; |
|---|
| 83 | 76 | |
|---|
| 84 | 77 | flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); |
|---|
| 85 | 78 | if (!flow) |
|---|
| .. | .. |
|---|
| 111 | 104 | return table->count; |
|---|
| 112 | 105 | } |
|---|
| 113 | 106 | |
|---|
| 114 | | -static struct flex_array *alloc_buckets(unsigned int n_buckets) |
|---|
| 115 | | -{ |
|---|
| 116 | | - struct flex_array *buckets; |
|---|
| 117 | | - int i, err; |
|---|
| 118 | | - |
|---|
| 119 | | - buckets = flex_array_alloc(sizeof(struct hlist_head), |
|---|
| 120 | | - n_buckets, GFP_KERNEL); |
|---|
| 121 | | - if (!buckets) |
|---|
| 122 | | - return NULL; |
|---|
| 123 | | - |
|---|
| 124 | | - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); |
|---|
| 125 | | - if (err) { |
|---|
| 126 | | - flex_array_free(buckets); |
|---|
| 127 | | - return NULL; |
|---|
| 128 | | - } |
|---|
| 129 | | - |
|---|
| 130 | | - for (i = 0; i < n_buckets; i++) |
|---|
| 131 | | - INIT_HLIST_HEAD((struct hlist_head *) |
|---|
| 132 | | - flex_array_get(buckets, i)); |
|---|
| 133 | | - |
|---|
| 134 | | - return buckets; |
|---|
| 135 | | -} |
|---|
| 136 | | - |
|---|
| 137 | 107 | static void flow_free(struct sw_flow *flow) |
|---|
| 138 | 108 | { |
|---|
| 139 | 109 | int cpu; |
|---|
| .. | .. |
|---|
| 141 | 111 | if (ovs_identifier_is_key(&flow->id)) |
|---|
| 142 | 112 | kfree(flow->id.unmasked_key); |
|---|
| 143 | 113 | if (flow->sf_acts) |
|---|
| 144 | | - ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); |
|---|
| 114 | + ovs_nla_free_flow_actions((struct sw_flow_actions __force *) |
|---|
| 115 | + flow->sf_acts); |
|---|
| 145 | 116 | /* We open code this to make sure cpu 0 is always considered */ |
|---|
| 146 | | - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) |
|---|
| 117 | + for (cpu = 0; cpu < nr_cpu_ids; |
|---|
| 118 | + cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { |
|---|
| 147 | 119 | if (flow->stats[cpu]) |
|---|
| 148 | 120 | kmem_cache_free(flow_stats_cache, |
|---|
| 149 | | - (struct flow_stats __force *)flow->stats[cpu]); |
|---|
| 121 | + (struct sw_flow_stats __force *)flow->stats[cpu]); |
|---|
| 122 | + } |
|---|
| 123 | + |
|---|
| 150 | 124 | kmem_cache_free(flow_cache, flow); |
|---|
| 151 | 125 | } |
|---|
| 152 | 126 | |
|---|
| .. | .. |
|---|
| 168 | 142 | flow_free(flow); |
|---|
| 169 | 143 | } |
|---|
| 170 | 144 | |
|---|
| 171 | | -static void free_buckets(struct flex_array *buckets) |
|---|
| 172 | | -{ |
|---|
| 173 | | - flex_array_free(buckets); |
|---|
| 174 | | -} |
|---|
| 175 | | - |
|---|
| 176 | | - |
|---|
| 177 | 145 | static void __table_instance_destroy(struct table_instance *ti) |
|---|
| 178 | 146 | { |
|---|
| 179 | | - free_buckets(ti->buckets); |
|---|
| 147 | + kvfree(ti->buckets); |
|---|
| 180 | 148 | kfree(ti); |
|---|
| 181 | 149 | } |
|---|
| 182 | 150 | |
|---|
| 183 | 151 | static struct table_instance *table_instance_alloc(int new_size) |
|---|
| 184 | 152 | { |
|---|
| 185 | 153 | struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); |
|---|
| 154 | + int i; |
|---|
| 186 | 155 | |
|---|
| 187 | 156 | if (!ti) |
|---|
| 188 | 157 | return NULL; |
|---|
| 189 | 158 | |
|---|
| 190 | | - ti->buckets = alloc_buckets(new_size); |
|---|
| 191 | | - |
|---|
| 159 | + ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head), |
|---|
| 160 | + GFP_KERNEL); |
|---|
| 192 | 161 | if (!ti->buckets) { |
|---|
| 193 | 162 | kfree(ti); |
|---|
| 194 | 163 | return NULL; |
|---|
| 195 | 164 | } |
|---|
| 165 | + |
|---|
| 166 | + for (i = 0; i < new_size; i++) |
|---|
| 167 | + INIT_HLIST_HEAD(&ti->buckets[i]); |
|---|
| 168 | + |
|---|
| 196 | 169 | ti->n_buckets = new_size; |
|---|
| 197 | 170 | ti->node_ver = 0; |
|---|
| 198 | | - ti->keep_flows = false; |
|---|
| 199 | 171 | get_random_bytes(&ti->hash_seed, sizeof(u32)); |
|---|
| 200 | 172 | |
|---|
| 201 | 173 | return ti; |
|---|
| 202 | 174 | } |
|---|
| 203 | 175 | |
|---|
| 176 | +static void __mask_array_destroy(struct mask_array *ma) |
|---|
| 177 | +{ |
|---|
| 178 | + free_percpu(ma->masks_usage_stats); |
|---|
| 179 | + kfree(ma); |
|---|
| 180 | +} |
|---|
| 181 | + |
|---|
| 182 | +static void mask_array_rcu_cb(struct rcu_head *rcu) |
|---|
| 183 | +{ |
|---|
| 184 | + struct mask_array *ma = container_of(rcu, struct mask_array, rcu); |
|---|
| 185 | + |
|---|
| 186 | + __mask_array_destroy(ma); |
|---|
| 187 | +} |
|---|
| 188 | + |
|---|
| 189 | +static void tbl_mask_array_reset_counters(struct mask_array *ma) |
|---|
| 190 | +{ |
|---|
| 191 | + int i, cpu; |
|---|
| 192 | + |
|---|
| 193 | + /* As the per CPU counters are not atomic we can not go ahead and |
|---|
| 194 | + * reset them from another CPU. To be able to still have an approximate |
|---|
| 195 | + * zero based counter we store the value at reset, and subtract it |
|---|
| 196 | + * later when processing. |
|---|
| 197 | + */ |
|---|
| 198 | + for (i = 0; i < ma->max; i++) { |
|---|
| 199 | + ma->masks_usage_zero_cntr[i] = 0; |
|---|
| 200 | + |
|---|
| 201 | + for_each_possible_cpu(cpu) { |
|---|
| 202 | + struct mask_array_stats *stats; |
|---|
| 203 | + unsigned int start; |
|---|
| 204 | + u64 counter; |
|---|
| 205 | + |
|---|
| 206 | + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); |
|---|
| 207 | + do { |
|---|
| 208 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
|---|
| 209 | + counter = stats->usage_cntrs[i]; |
|---|
| 210 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); |
|---|
| 211 | + |
|---|
| 212 | + ma->masks_usage_zero_cntr[i] += counter; |
|---|
| 213 | + } |
|---|
| 214 | + } |
|---|
| 215 | +} |
|---|
| 216 | + |
|---|
| 217 | +static struct mask_array *tbl_mask_array_alloc(int size) |
|---|
| 218 | +{ |
|---|
| 219 | + struct mask_array *new; |
|---|
| 220 | + |
|---|
| 221 | + size = max(MASK_ARRAY_SIZE_MIN, size); |
|---|
| 222 | + new = kzalloc(sizeof(struct mask_array) + |
|---|
| 223 | + sizeof(struct sw_flow_mask *) * size + |
|---|
| 224 | + sizeof(u64) * size, GFP_KERNEL); |
|---|
| 225 | + if (!new) |
|---|
| 226 | + return NULL; |
|---|
| 227 | + |
|---|
| 228 | + new->masks_usage_zero_cntr = (u64 *)((u8 *)new + |
|---|
| 229 | + sizeof(struct mask_array) + |
|---|
| 230 | + sizeof(struct sw_flow_mask *) * |
|---|
| 231 | + size); |
|---|
| 232 | + |
|---|
| 233 | + new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + |
|---|
| 234 | + sizeof(u64) * size, |
|---|
| 235 | + __alignof__(u64)); |
|---|
| 236 | + if (!new->masks_usage_stats) { |
|---|
| 237 | + kfree(new); |
|---|
| 238 | + return NULL; |
|---|
| 239 | + } |
|---|
| 240 | + |
|---|
| 241 | + new->count = 0; |
|---|
| 242 | + new->max = size; |
|---|
| 243 | + |
|---|
| 244 | + return new; |
|---|
| 245 | +} |
|---|
| 246 | + |
|---|
| 247 | +static int tbl_mask_array_realloc(struct flow_table *tbl, int size) |
|---|
| 248 | +{ |
|---|
| 249 | + struct mask_array *old; |
|---|
| 250 | + struct mask_array *new; |
|---|
| 251 | + |
|---|
| 252 | + new = tbl_mask_array_alloc(size); |
|---|
| 253 | + if (!new) |
|---|
| 254 | + return -ENOMEM; |
|---|
| 255 | + |
|---|
| 256 | + old = ovsl_dereference(tbl->mask_array); |
|---|
| 257 | + if (old) { |
|---|
| 258 | + int i; |
|---|
| 259 | + |
|---|
| 260 | + for (i = 0; i < old->max; i++) { |
|---|
| 261 | + if (ovsl_dereference(old->masks[i])) |
|---|
| 262 | + new->masks[new->count++] = old->masks[i]; |
|---|
| 263 | + } |
|---|
| 264 | + call_rcu(&old->rcu, mask_array_rcu_cb); |
|---|
| 265 | + } |
|---|
| 266 | + |
|---|
| 267 | + rcu_assign_pointer(tbl->mask_array, new); |
|---|
| 268 | + |
|---|
| 269 | + return 0; |
|---|
| 270 | +} |
|---|
| 271 | + |
|---|
| 272 | +static int tbl_mask_array_add_mask(struct flow_table *tbl, |
|---|
| 273 | + struct sw_flow_mask *new) |
|---|
| 274 | +{ |
|---|
| 275 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
|---|
| 276 | + int err, ma_count = READ_ONCE(ma->count); |
|---|
| 277 | + |
|---|
| 278 | + if (ma_count >= ma->max) { |
|---|
| 279 | + err = tbl_mask_array_realloc(tbl, ma->max + |
|---|
| 280 | + MASK_ARRAY_SIZE_MIN); |
|---|
| 281 | + if (err) |
|---|
| 282 | + return err; |
|---|
| 283 | + |
|---|
| 284 | + ma = ovsl_dereference(tbl->mask_array); |
|---|
| 285 | + } else { |
|---|
| 286 | + /* On every add or delete we need to reset the counters so |
|---|
| 287 | + * every new mask gets a fair chance of being prioritized. |
|---|
| 288 | + */ |
|---|
| 289 | + tbl_mask_array_reset_counters(ma); |
|---|
| 290 | + } |
|---|
| 291 | + |
|---|
| 292 | + BUG_ON(ovsl_dereference(ma->masks[ma_count])); |
|---|
| 293 | + |
|---|
| 294 | + rcu_assign_pointer(ma->masks[ma_count], new); |
|---|
| 295 | + WRITE_ONCE(ma->count, ma_count + 1); |
|---|
| 296 | + |
|---|
| 297 | + return 0; |
|---|
| 298 | +} |
|---|
| 299 | + |
|---|
| 300 | +static void tbl_mask_array_del_mask(struct flow_table *tbl, |
|---|
| 301 | + struct sw_flow_mask *mask) |
|---|
| 302 | +{ |
|---|
| 303 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
|---|
| 304 | + int i, ma_count = READ_ONCE(ma->count); |
|---|
| 305 | + |
|---|
| 306 | + /* Remove the deleted mask pointers from the array */ |
|---|
| 307 | + for (i = 0; i < ma_count; i++) { |
|---|
| 308 | + if (mask == ovsl_dereference(ma->masks[i])) |
|---|
| 309 | + goto found; |
|---|
| 310 | + } |
|---|
| 311 | + |
|---|
| 312 | + BUG(); |
|---|
| 313 | + return; |
|---|
| 314 | + |
|---|
| 315 | +found: |
|---|
| 316 | + WRITE_ONCE(ma->count, ma_count - 1); |
|---|
| 317 | + |
|---|
| 318 | + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]); |
|---|
| 319 | + RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL); |
|---|
| 320 | + |
|---|
| 321 | + kfree_rcu(mask, rcu); |
|---|
| 322 | + |
|---|
| 323 | + /* Shrink the mask array if necessary. */ |
|---|
| 324 | + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && |
|---|
| 325 | + ma_count <= (ma->max / 3)) |
|---|
| 326 | + tbl_mask_array_realloc(tbl, ma->max / 2); |
|---|
| 327 | + else |
|---|
| 328 | + tbl_mask_array_reset_counters(ma); |
|---|
| 329 | + |
|---|
| 330 | +} |
|---|
| 331 | + |
|---|
| 332 | +/* Remove 'mask' from the mask list, if it is not needed any more. */ |
|---|
| 333 | +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) |
|---|
| 334 | +{ |
|---|
| 335 | + if (mask) { |
|---|
| 336 | + /* ovs-lock is required to protect mask-refcount and |
|---|
| 337 | + * mask list. |
|---|
| 338 | + */ |
|---|
| 339 | + ASSERT_OVSL(); |
|---|
| 340 | + BUG_ON(!mask->ref_count); |
|---|
| 341 | + mask->ref_count--; |
|---|
| 342 | + |
|---|
| 343 | + if (!mask->ref_count) |
|---|
| 344 | + tbl_mask_array_del_mask(tbl, mask); |
|---|
| 345 | + } |
|---|
| 346 | +} |
|---|
| 347 | + |
|---|
| 348 | +static void __mask_cache_destroy(struct mask_cache *mc) |
|---|
| 349 | +{ |
|---|
| 350 | + free_percpu(mc->mask_cache); |
|---|
| 351 | + kfree(mc); |
|---|
| 352 | +} |
|---|
| 353 | + |
|---|
| 354 | +static void mask_cache_rcu_cb(struct rcu_head *rcu) |
|---|
| 355 | +{ |
|---|
| 356 | + struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu); |
|---|
| 357 | + |
|---|
| 358 | + __mask_cache_destroy(mc); |
|---|
| 359 | +} |
|---|
| 360 | + |
|---|
| 361 | +static struct mask_cache *tbl_mask_cache_alloc(u32 size) |
|---|
| 362 | +{ |
|---|
| 363 | + struct mask_cache_entry __percpu *cache = NULL; |
|---|
| 364 | + struct mask_cache *new; |
|---|
| 365 | + |
|---|
| 366 | + /* Only allow size to be 0, or a power of 2, and does not exceed |
|---|
| 367 | + * percpu allocation size. |
|---|
| 368 | + */ |
|---|
| 369 | + if ((!is_power_of_2(size) && size != 0) || |
|---|
| 370 | + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) |
|---|
| 371 | + return NULL; |
|---|
| 372 | + |
|---|
| 373 | + new = kzalloc(sizeof(*new), GFP_KERNEL); |
|---|
| 374 | + if (!new) |
|---|
| 375 | + return NULL; |
|---|
| 376 | + |
|---|
| 377 | + new->cache_size = size; |
|---|
| 378 | + if (new->cache_size > 0) { |
|---|
| 379 | + cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry), |
|---|
| 380 | + new->cache_size), |
|---|
| 381 | + __alignof__(struct mask_cache_entry)); |
|---|
| 382 | + if (!cache) { |
|---|
| 383 | + kfree(new); |
|---|
| 384 | + return NULL; |
|---|
| 385 | + } |
|---|
| 386 | + } |
|---|
| 387 | + |
|---|
| 388 | + new->mask_cache = cache; |
|---|
| 389 | + return new; |
|---|
| 390 | +} |
|---|
| 391 | +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) |
|---|
| 392 | +{ |
|---|
| 393 | + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); |
|---|
| 394 | + struct mask_cache *new; |
|---|
| 395 | + |
|---|
| 396 | + if (size == mc->cache_size) |
|---|
| 397 | + return 0; |
|---|
| 398 | + |
|---|
| 399 | + if ((!is_power_of_2(size) && size != 0) || |
|---|
| 400 | + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) |
|---|
| 401 | + return -EINVAL; |
|---|
| 402 | + |
|---|
| 403 | + new = tbl_mask_cache_alloc(size); |
|---|
| 404 | + if (!new) |
|---|
| 405 | + return -ENOMEM; |
|---|
| 406 | + |
|---|
| 407 | + rcu_assign_pointer(table->mask_cache, new); |
|---|
| 408 | + call_rcu(&mc->rcu, mask_cache_rcu_cb); |
|---|
| 409 | + |
|---|
| 410 | + return 0; |
|---|
| 411 | +} |
|---|
| 412 | + |
|---|
| 204 | 413 | int ovs_flow_tbl_init(struct flow_table *table) |
|---|
| 205 | 414 | { |
|---|
| 206 | 415 | struct table_instance *ti, *ufid_ti; |
|---|
| 416 | + struct mask_cache *mc; |
|---|
| 417 | + struct mask_array *ma; |
|---|
| 418 | + |
|---|
| 419 | + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); |
|---|
| 420 | + if (!mc) |
|---|
| 421 | + return -ENOMEM; |
|---|
| 422 | + |
|---|
| 423 | + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); |
|---|
| 424 | + if (!ma) |
|---|
| 425 | + goto free_mask_cache; |
|---|
| 207 | 426 | |
|---|
| 208 | 427 | ti = table_instance_alloc(TBL_MIN_BUCKETS); |
|---|
| 209 | | - |
|---|
| 210 | 428 | if (!ti) |
|---|
| 211 | | - return -ENOMEM; |
|---|
| 429 | + goto free_mask_array; |
|---|
| 212 | 430 | |
|---|
| 213 | 431 | ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); |
|---|
| 214 | 432 | if (!ufid_ti) |
|---|
| .. | .. |
|---|
| 216 | 434 | |
|---|
| 217 | 435 | rcu_assign_pointer(table->ti, ti); |
|---|
| 218 | 436 | rcu_assign_pointer(table->ufid_ti, ufid_ti); |
|---|
| 219 | | - INIT_LIST_HEAD(&table->mask_list); |
|---|
| 437 | + rcu_assign_pointer(table->mask_array, ma); |
|---|
| 438 | + rcu_assign_pointer(table->mask_cache, mc); |
|---|
| 220 | 439 | table->last_rehash = jiffies; |
|---|
| 221 | 440 | table->count = 0; |
|---|
| 222 | 441 | table->ufid_count = 0; |
|---|
| .. | .. |
|---|
| 224 | 443 | |
|---|
| 225 | 444 | free_ti: |
|---|
| 226 | 445 | __table_instance_destroy(ti); |
|---|
| 446 | +free_mask_array: |
|---|
| 447 | + __mask_array_destroy(ma); |
|---|
| 448 | +free_mask_cache: |
|---|
| 449 | + __mask_cache_destroy(mc); |
|---|
| 227 | 450 | return -ENOMEM; |
|---|
| 228 | 451 | } |
|---|
| 229 | 452 | |
|---|
| 230 | 453 | static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) |
|---|
| 231 | 454 | { |
|---|
| 232 | | - struct table_instance *ti = container_of(rcu, struct table_instance, rcu); |
|---|
| 455 | + struct table_instance *ti; |
|---|
| 233 | 456 | |
|---|
| 457 | + ti = container_of(rcu, struct table_instance, rcu); |
|---|
| 234 | 458 | __table_instance_destroy(ti); |
|---|
| 235 | 459 | } |
|---|
| 236 | 460 | |
|---|
| 237 | | -static void table_instance_destroy(struct table_instance *ti, |
|---|
| 238 | | - struct table_instance *ufid_ti, |
|---|
| 239 | | - bool deferred) |
|---|
| 461 | +static void table_instance_flow_free(struct flow_table *table, |
|---|
| 462 | + struct table_instance *ti, |
|---|
| 463 | + struct table_instance *ufid_ti, |
|---|
| 464 | + struct sw_flow *flow) |
|---|
| 465 | +{ |
|---|
| 466 | + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); |
|---|
| 467 | + table->count--; |
|---|
| 468 | + |
|---|
| 469 | + if (ovs_identifier_is_ufid(&flow->id)) { |
|---|
| 470 | + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); |
|---|
| 471 | + table->ufid_count--; |
|---|
| 472 | + } |
|---|
| 473 | + |
|---|
| 474 | + flow_mask_remove(table, flow->mask); |
|---|
| 475 | +} |
|---|
| 476 | + |
|---|
| 477 | +/* Must be called with OVS mutex held. */ |
|---|
| 478 | +void table_instance_flow_flush(struct flow_table *table, |
|---|
| 479 | + struct table_instance *ti, |
|---|
| 480 | + struct table_instance *ufid_ti) |
|---|
| 240 | 481 | { |
|---|
| 241 | 482 | int i; |
|---|
| 242 | 483 | |
|---|
| 243 | | - if (!ti) |
|---|
| 244 | | - return; |
|---|
| 245 | | - |
|---|
| 246 | | - BUG_ON(!ufid_ti); |
|---|
| 247 | | - if (ti->keep_flows) |
|---|
| 248 | | - goto skip_flows; |
|---|
| 249 | | - |
|---|
| 250 | 484 | for (i = 0; i < ti->n_buckets; i++) { |
|---|
| 251 | | - struct sw_flow *flow; |
|---|
| 252 | | - struct hlist_head *head = flex_array_get(ti->buckets, i); |
|---|
| 485 | + struct hlist_head *head = &ti->buckets[i]; |
|---|
| 253 | 486 | struct hlist_node *n; |
|---|
| 254 | | - int ver = ti->node_ver; |
|---|
| 255 | | - int ufid_ver = ufid_ti->node_ver; |
|---|
| 487 | + struct sw_flow *flow; |
|---|
| 256 | 488 | |
|---|
| 257 | | - hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { |
|---|
| 258 | | - hlist_del_rcu(&flow->flow_table.node[ver]); |
|---|
| 259 | | - if (ovs_identifier_is_ufid(&flow->id)) |
|---|
| 260 | | - hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); |
|---|
| 261 | | - ovs_flow_free(flow, deferred); |
|---|
| 489 | + hlist_for_each_entry_safe(flow, n, head, |
|---|
| 490 | + flow_table.node[ti->node_ver]) { |
|---|
| 491 | + |
|---|
| 492 | + table_instance_flow_free(table, ti, ufid_ti, |
|---|
| 493 | + flow); |
|---|
| 494 | + ovs_flow_free(flow, true); |
|---|
| 262 | 495 | } |
|---|
| 263 | 496 | } |
|---|
| 264 | 497 | |
|---|
| 265 | | -skip_flows: |
|---|
| 266 | | - if (deferred) { |
|---|
| 267 | | - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); |
|---|
| 268 | | - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); |
|---|
| 269 | | - } else { |
|---|
| 270 | | - __table_instance_destroy(ti); |
|---|
| 271 | | - __table_instance_destroy(ufid_ti); |
|---|
| 498 | + if (WARN_ON(table->count != 0 || |
|---|
| 499 | + table->ufid_count != 0)) { |
|---|
| 500 | + table->count = 0; |
|---|
| 501 | + table->ufid_count = 0; |
|---|
| 272 | 502 | } |
|---|
| 503 | +} |
|---|
| 504 | + |
|---|
| 505 | +static void table_instance_destroy(struct table_instance *ti, |
|---|
| 506 | + struct table_instance *ufid_ti) |
|---|
| 507 | +{ |
|---|
| 508 | + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); |
|---|
| 509 | + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); |
|---|
| 273 | 510 | } |
|---|
| 274 | 511 | |
|---|
| 275 | 512 | /* No need for locking this function is called from RCU callback or |
|---|
| .. | .. |
|---|
| 279 | 516 | { |
|---|
| 280 | 517 | struct table_instance *ti = rcu_dereference_raw(table->ti); |
|---|
| 281 | 518 | struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); |
|---|
| 519 | + struct mask_cache *mc = rcu_dereference_raw(table->mask_cache); |
|---|
| 520 | + struct mask_array *ma = rcu_dereference_raw(table->mask_array); |
|---|
| 282 | 521 | |
|---|
| 283 | | - table_instance_destroy(ti, ufid_ti, false); |
|---|
| 522 | + call_rcu(&mc->rcu, mask_cache_rcu_cb); |
|---|
| 523 | + call_rcu(&ma->rcu, mask_array_rcu_cb); |
|---|
| 524 | + table_instance_destroy(ti, ufid_ti); |
|---|
| 284 | 525 | } |
|---|
| 285 | 526 | |
|---|
| 286 | 527 | struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, |
|---|
| .. | .. |
|---|
| 294 | 535 | ver = ti->node_ver; |
|---|
| 295 | 536 | while (*bucket < ti->n_buckets) { |
|---|
| 296 | 537 | i = 0; |
|---|
| 297 | | - head = flex_array_get(ti->buckets, *bucket); |
|---|
| 538 | + head = &ti->buckets[*bucket]; |
|---|
| 298 | 539 | hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { |
|---|
| 299 | 540 | if (i < *last) { |
|---|
| 300 | 541 | i++; |
|---|
| .. | .. |
|---|
| 313 | 554 | static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) |
|---|
| 314 | 555 | { |
|---|
| 315 | 556 | hash = jhash_1word(hash, ti->hash_seed); |
|---|
| 316 | | - return flex_array_get(ti->buckets, |
|---|
| 317 | | - (hash & (ti->n_buckets - 1))); |
|---|
| 557 | + return &ti->buckets[hash & (ti->n_buckets - 1)]; |
|---|
| 318 | 558 | } |
|---|
| 319 | 559 | |
|---|
| 320 | 560 | static void table_instance_insert(struct table_instance *ti, |
|---|
| .. | .. |
|---|
| 347 | 587 | /* Insert in new table. */ |
|---|
| 348 | 588 | for (i = 0; i < old->n_buckets; i++) { |
|---|
| 349 | 589 | struct sw_flow *flow; |
|---|
| 350 | | - struct hlist_head *head; |
|---|
| 351 | | - |
|---|
| 352 | | - head = flex_array_get(old->buckets, i); |
|---|
| 590 | + struct hlist_head *head = &old->buckets[i]; |
|---|
| 353 | 591 | |
|---|
| 354 | 592 | if (ufid) |
|---|
| 355 | | - hlist_for_each_entry(flow, head, |
|---|
| 356 | | - ufid_table.node[old_ver]) |
|---|
| 593 | + hlist_for_each_entry_rcu(flow, head, |
|---|
| 594 | + ufid_table.node[old_ver], |
|---|
| 595 | + lockdep_ovsl_is_held()) |
|---|
| 357 | 596 | ufid_table_instance_insert(new, flow); |
|---|
| 358 | 597 | else |
|---|
| 359 | | - hlist_for_each_entry(flow, head, |
|---|
| 360 | | - flow_table.node[old_ver]) |
|---|
| 598 | + hlist_for_each_entry_rcu(flow, head, |
|---|
| 599 | + flow_table.node[old_ver], |
|---|
| 600 | + lockdep_ovsl_is_held()) |
|---|
| 361 | 601 | table_instance_insert(new, flow); |
|---|
| 362 | 602 | } |
|---|
| 363 | | - |
|---|
| 364 | | - old->keep_flows = true; |
|---|
| 365 | 603 | } |
|---|
| 366 | 604 | |
|---|
| 367 | 605 | static struct table_instance *table_instance_rehash(struct table_instance *ti, |
|---|
| .. | .. |
|---|
| 396 | 634 | rcu_assign_pointer(flow_table->ti, new_ti); |
|---|
| 397 | 635 | rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); |
|---|
| 398 | 636 | flow_table->last_rehash = jiffies; |
|---|
| 399 | | - flow_table->count = 0; |
|---|
| 400 | | - flow_table->ufid_count = 0; |
|---|
| 401 | 637 | |
|---|
| 402 | | - table_instance_destroy(old_ti, old_ufid_ti, true); |
|---|
| 638 | + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); |
|---|
| 639 | + table_instance_destroy(old_ti, old_ufid_ti); |
|---|
| 403 | 640 | return 0; |
|---|
| 404 | 641 | |
|---|
| 405 | 642 | err_free_ti: |
|---|
| .. | .. |
|---|
| 410 | 647 | static u32 flow_hash(const struct sw_flow_key *key, |
|---|
| 411 | 648 | const struct sw_flow_key_range *range) |
|---|
| 412 | 649 | { |
|---|
| 413 | | - int key_start = range->start; |
|---|
| 414 | | - int key_end = range->end; |
|---|
| 415 | | - const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); |
|---|
| 416 | | - int hash_u32s = (key_end - key_start) >> 2; |
|---|
| 650 | + const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); |
|---|
| 417 | 651 | |
|---|
| 418 | 652 | /* Make sure number of hash bytes are multiple of u32. */ |
|---|
| 419 | | - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); |
|---|
| 653 | + int hash_u32s = range_n_bytes(range) >> 2; |
|---|
| 420 | 654 | |
|---|
| 421 | 655 | return jhash2(hash_key, hash_u32s, 0); |
|---|
| 422 | 656 | } |
|---|
| .. | .. |
|---|
| 427 | 661 | return 0; |
|---|
| 428 | 662 | else |
|---|
| 429 | 663 | return rounddown(offsetof(struct sw_flow_key, phy), |
|---|
| 430 | | - sizeof(long)); |
|---|
| 664 | + sizeof(long)); |
|---|
| 431 | 665 | } |
|---|
| 432 | 666 | |
|---|
| 433 | 667 | static bool cmp_key(const struct sw_flow_key *key1, |
|---|
| .. | .. |
|---|
| 439 | 673 | long diffs = 0; |
|---|
| 440 | 674 | int i; |
|---|
| 441 | 675 | |
|---|
| 442 | | - for (i = key_start; i < key_end; i += sizeof(long)) |
|---|
| 676 | + for (i = key_start; i < key_end; i += sizeof(long)) |
|---|
| 443 | 677 | diffs |= *cp1++ ^ *cp2++; |
|---|
| 444 | 678 | |
|---|
| 445 | 679 | return diffs == 0; |
|---|
| .. | .. |
|---|
| 465 | 699 | |
|---|
| 466 | 700 | static struct sw_flow *masked_flow_lookup(struct table_instance *ti, |
|---|
| 467 | 701 | const struct sw_flow_key *unmasked, |
|---|
| 468 | | - const struct sw_flow_mask *mask) |
|---|
| 702 | + const struct sw_flow_mask *mask, |
|---|
| 703 | + u32 *n_mask_hit) |
|---|
| 469 | 704 | { |
|---|
| 470 | 705 | struct sw_flow *flow; |
|---|
| 471 | 706 | struct hlist_head *head; |
|---|
| .. | .. |
|---|
| 475 | 710 | ovs_flow_mask_key(&masked_key, unmasked, false, mask); |
|---|
| 476 | 711 | hash = flow_hash(&masked_key, &mask->range); |
|---|
| 477 | 712 | head = find_bucket(ti, hash); |
|---|
| 478 | | - hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { |
|---|
| 713 | + (*n_mask_hit)++; |
|---|
| 714 | + |
|---|
| 715 | + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], |
|---|
| 716 | + lockdep_ovsl_is_held()) { |
|---|
| 479 | 717 | if (flow->mask == mask && flow->flow_table.hash == hash && |
|---|
| 480 | 718 | flow_cmp_masked_key(flow, &masked_key, &mask->range)) |
|---|
| 481 | 719 | return flow; |
|---|
| .. | .. |
|---|
| 483 | 721 | return NULL; |
|---|
| 484 | 722 | } |
|---|
| 485 | 723 | |
|---|
| 486 | | -struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, |
|---|
| 487 | | - const struct sw_flow_key *key, |
|---|
| 488 | | - u32 *n_mask_hit) |
|---|
| 724 | +/* Flow lookup does full lookup on flow table. It starts with |
|---|
| 725 | + * mask from index passed in *index. |
|---|
| 726 | + * This function MUST be called with BH disabled due to the use |
|---|
| 727 | + * of CPU specific variables. |
|---|
| 728 | + */ |
|---|
| 729 | +static struct sw_flow *flow_lookup(struct flow_table *tbl, |
|---|
| 730 | + struct table_instance *ti, |
|---|
| 731 | + struct mask_array *ma, |
|---|
| 732 | + const struct sw_flow_key *key, |
|---|
| 733 | + u32 *n_mask_hit, |
|---|
| 734 | + u32 *n_cache_hit, |
|---|
| 735 | + u32 *index) |
|---|
| 489 | 736 | { |
|---|
| 490 | | - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
|---|
| 491 | | - struct sw_flow_mask *mask; |
|---|
| 737 | + struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats); |
|---|
| 492 | 738 | struct sw_flow *flow; |
|---|
| 739 | + struct sw_flow_mask *mask; |
|---|
| 740 | + int i; |
|---|
| 741 | + |
|---|
| 742 | + if (likely(*index < ma->max)) { |
|---|
| 743 | + mask = rcu_dereference_ovsl(ma->masks[*index]); |
|---|
| 744 | + if (mask) { |
|---|
| 745 | + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); |
|---|
| 746 | + if (flow) { |
|---|
| 747 | + u64_stats_update_begin(&stats->syncp); |
|---|
| 748 | + stats->usage_cntrs[*index]++; |
|---|
| 749 | + u64_stats_update_end(&stats->syncp); |
|---|
| 750 | + (*n_cache_hit)++; |
|---|
| 751 | + return flow; |
|---|
| 752 | + } |
|---|
| 753 | + } |
|---|
| 754 | + } |
|---|
| 755 | + |
|---|
| 756 | + for (i = 0; i < ma->max; i++) { |
|---|
| 757 | + |
|---|
| 758 | + if (i == *index) |
|---|
| 759 | + continue; |
|---|
| 760 | + |
|---|
| 761 | + mask = rcu_dereference_ovsl(ma->masks[i]); |
|---|
| 762 | + if (unlikely(!mask)) |
|---|
| 763 | + break; |
|---|
| 764 | + |
|---|
| 765 | + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); |
|---|
| 766 | + if (flow) { /* Found */ |
|---|
| 767 | + *index = i; |
|---|
| 768 | + u64_stats_update_begin(&stats->syncp); |
|---|
| 769 | + stats->usage_cntrs[*index]++; |
|---|
| 770 | + u64_stats_update_end(&stats->syncp); |
|---|
| 771 | + return flow; |
|---|
| 772 | + } |
|---|
| 773 | + } |
|---|
| 774 | + |
|---|
| 775 | + return NULL; |
|---|
| 776 | +} |
|---|
| 777 | + |
|---|
| 778 | +/* |
|---|
| 779 | + * mask_cache maps flow to probable mask. This cache is not tightly |
|---|
| 780 | + * coupled cache, It means updates to mask list can result in inconsistent |
|---|
| 781 | + * cache entry in mask cache. |
|---|
| 782 | + * This is per cpu cache and is divided in MC_HASH_SEGS segments. |
|---|
| 783 | + * In case of a hash collision the entry is hashed in next segment. |
|---|
| 784 | + * */ |
|---|
| 785 | +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, |
|---|
| 786 | + const struct sw_flow_key *key, |
|---|
| 787 | + u32 skb_hash, |
|---|
| 788 | + u32 *n_mask_hit, |
|---|
| 789 | + u32 *n_cache_hit) |
|---|
| 790 | +{ |
|---|
| 791 | + struct mask_cache *mc = rcu_dereference(tbl->mask_cache); |
|---|
| 792 | + struct mask_array *ma = rcu_dereference(tbl->mask_array); |
|---|
| 793 | + struct table_instance *ti = rcu_dereference(tbl->ti); |
|---|
| 794 | + struct mask_cache_entry *entries, *ce; |
|---|
| 795 | + struct sw_flow *flow; |
|---|
| 796 | + u32 hash; |
|---|
| 797 | + int seg; |
|---|
| 493 | 798 | |
|---|
| 494 | 799 | *n_mask_hit = 0; |
|---|
| 495 | | - list_for_each_entry_rcu(mask, &tbl->mask_list, list) { |
|---|
| 496 | | - (*n_mask_hit)++; |
|---|
| 497 | | - flow = masked_flow_lookup(ti, key, mask); |
|---|
| 498 | | - if (flow) /* Found */ |
|---|
| 499 | | - return flow; |
|---|
| 800 | + *n_cache_hit = 0; |
|---|
| 801 | + if (unlikely(!skb_hash || mc->cache_size == 0)) { |
|---|
| 802 | + u32 mask_index = 0; |
|---|
| 803 | + u32 cache = 0; |
|---|
| 804 | + |
|---|
| 805 | + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache, |
|---|
| 806 | + &mask_index); |
|---|
| 500 | 807 | } |
|---|
| 501 | | - return NULL; |
|---|
| 808 | + |
|---|
| 809 | + /* Pre and post recirulation flows usually have the same skb_hash |
|---|
| 810 | + * value. To avoid hash collisions, rehash the 'skb_hash' with |
|---|
| 811 | + * 'recirc_id'. */ |
|---|
| 812 | + if (key->recirc_id) |
|---|
| 813 | + skb_hash = jhash_1word(skb_hash, key->recirc_id); |
|---|
| 814 | + |
|---|
| 815 | + ce = NULL; |
|---|
| 816 | + hash = skb_hash; |
|---|
| 817 | + entries = this_cpu_ptr(mc->mask_cache); |
|---|
| 818 | + |
|---|
| 819 | + /* Find the cache entry 'ce' to operate on. */ |
|---|
| 820 | + for (seg = 0; seg < MC_HASH_SEGS; seg++) { |
|---|
| 821 | + int index = hash & (mc->cache_size - 1); |
|---|
| 822 | + struct mask_cache_entry *e; |
|---|
| 823 | + |
|---|
| 824 | + e = &entries[index]; |
|---|
| 825 | + if (e->skb_hash == skb_hash) { |
|---|
| 826 | + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, |
|---|
| 827 | + n_cache_hit, &e->mask_index); |
|---|
| 828 | + if (!flow) |
|---|
| 829 | + e->skb_hash = 0; |
|---|
| 830 | + return flow; |
|---|
| 831 | + } |
|---|
| 832 | + |
|---|
| 833 | + if (!ce || e->skb_hash < ce->skb_hash) |
|---|
| 834 | + ce = e; /* A better replacement cache candidate. */ |
|---|
| 835 | + |
|---|
| 836 | + hash >>= MC_HASH_SHIFT; |
|---|
| 837 | + } |
|---|
| 838 | + |
|---|
| 839 | + /* Cache miss, do full lookup. */ |
|---|
| 840 | + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, |
|---|
| 841 | + &ce->mask_index); |
|---|
| 842 | + if (flow) |
|---|
| 843 | + ce->skb_hash = skb_hash; |
|---|
| 844 | + |
|---|
| 845 | + *n_cache_hit = 0; |
|---|
| 846 | + return flow; |
|---|
| 502 | 847 | } |
|---|
| 503 | 848 | |
|---|
| 504 | 849 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, |
|---|
| 505 | 850 | const struct sw_flow_key *key) |
|---|
| 506 | 851 | { |
|---|
| 852 | + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
|---|
| 853 | + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); |
|---|
| 507 | 854 | u32 __always_unused n_mask_hit; |
|---|
| 855 | + u32 __always_unused n_cache_hit; |
|---|
| 856 | + struct sw_flow *flow; |
|---|
| 857 | + u32 index = 0; |
|---|
| 508 | 858 | |
|---|
| 509 | | - return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); |
|---|
| 859 | + /* This function gets called trough the netlink interface and therefore |
|---|
| 860 | + * is preemptible. However, flow_lookup() function needs to be called |
|---|
| 861 | + * with BH disabled due to CPU specific variables. |
|---|
| 862 | + */ |
|---|
| 863 | + local_bh_disable(); |
|---|
| 864 | + flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); |
|---|
| 865 | + local_bh_enable(); |
|---|
| 866 | + return flow; |
|---|
| 510 | 867 | } |
|---|
| 511 | 868 | |
|---|
| 512 | 869 | struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, |
|---|
| 513 | 870 | const struct sw_flow_match *match) |
|---|
| 514 | 871 | { |
|---|
| 515 | | - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
|---|
| 516 | | - struct sw_flow_mask *mask; |
|---|
| 517 | | - struct sw_flow *flow; |
|---|
| 872 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
|---|
| 873 | + int i; |
|---|
| 518 | 874 | |
|---|
| 519 | 875 | /* Always called under ovs-mutex. */ |
|---|
| 520 | | - list_for_each_entry(mask, &tbl->mask_list, list) { |
|---|
| 521 | | - flow = masked_flow_lookup(ti, match->key, mask); |
|---|
| 876 | + for (i = 0; i < ma->max; i++) { |
|---|
| 877 | + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
|---|
| 878 | + u32 __always_unused n_mask_hit; |
|---|
| 879 | + struct sw_flow_mask *mask; |
|---|
| 880 | + struct sw_flow *flow; |
|---|
| 881 | + |
|---|
| 882 | + mask = ovsl_dereference(ma->masks[i]); |
|---|
| 883 | + if (!mask) |
|---|
| 884 | + continue; |
|---|
| 885 | + |
|---|
| 886 | + flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); |
|---|
| 522 | 887 | if (flow && ovs_identifier_is_key(&flow->id) && |
|---|
| 523 | | - ovs_flow_cmp_unmasked_key(flow, match)) |
|---|
| 888 | + ovs_flow_cmp_unmasked_key(flow, match)) { |
|---|
| 524 | 889 | return flow; |
|---|
| 890 | + } |
|---|
| 525 | 891 | } |
|---|
| 892 | + |
|---|
| 526 | 893 | return NULL; |
|---|
| 527 | 894 | } |
|---|
| 528 | 895 | |
|---|
| .. | .. |
|---|
| 540 | 907 | return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); |
|---|
| 541 | 908 | } |
|---|
| 542 | 909 | |
|---|
| 543 | | -bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) |
|---|
| 910 | +bool ovs_flow_cmp(const struct sw_flow *flow, |
|---|
| 911 | + const struct sw_flow_match *match) |
|---|
| 544 | 912 | { |
|---|
| 545 | 913 | if (ovs_identifier_is_ufid(&flow->id)) |
|---|
| 546 | 914 | return flow_cmp_masked_key(flow, match->key, &match->range); |
|---|
| .. | .. |
|---|
| 558 | 926 | |
|---|
| 559 | 927 | hash = ufid_hash(ufid); |
|---|
| 560 | 928 | head = find_bucket(ti, hash); |
|---|
| 561 | | - hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { |
|---|
| 929 | + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], |
|---|
| 930 | + lockdep_ovsl_is_held()) { |
|---|
| 562 | 931 | if (flow->ufid_table.hash == hash && |
|---|
| 563 | 932 | ovs_flow_cmp_ufid(flow, ufid)) |
|---|
| 564 | 933 | return flow; |
|---|
| .. | .. |
|---|
| 568 | 937 | |
|---|
| 569 | 938 | int ovs_flow_tbl_num_masks(const struct flow_table *table) |
|---|
| 570 | 939 | { |
|---|
| 571 | | - struct sw_flow_mask *mask; |
|---|
| 572 | | - int num = 0; |
|---|
| 940 | + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); |
|---|
| 941 | + return READ_ONCE(ma->count); |
|---|
| 942 | +} |
|---|
| 573 | 943 | |
|---|
| 574 | | - list_for_each_entry(mask, &table->mask_list, list) |
|---|
| 575 | | - num++; |
|---|
| 944 | +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) |
|---|
| 945 | +{ |
|---|
| 946 | + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); |
|---|
| 576 | 947 | |
|---|
| 577 | | - return num; |
|---|
| 948 | + return READ_ONCE(mc->cache_size); |
|---|
| 578 | 949 | } |
|---|
| 579 | 950 | |
|---|
| 580 | 951 | static struct table_instance *table_instance_expand(struct table_instance *ti, |
|---|
| 581 | 952 | bool ufid) |
|---|
| 582 | 953 | { |
|---|
| 583 | 954 | return table_instance_rehash(ti, ti->n_buckets * 2, ufid); |
|---|
| 584 | | -} |
|---|
| 585 | | - |
|---|
| 586 | | -/* Remove 'mask' from the mask list, if it is not needed any more. */ |
|---|
| 587 | | -static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) |
|---|
| 588 | | -{ |
|---|
| 589 | | - if (mask) { |
|---|
| 590 | | - /* ovs-lock is required to protect mask-refcount and |
|---|
| 591 | | - * mask list. |
|---|
| 592 | | - */ |
|---|
| 593 | | - ASSERT_OVSL(); |
|---|
| 594 | | - BUG_ON(!mask->ref_count); |
|---|
| 595 | | - mask->ref_count--; |
|---|
| 596 | | - |
|---|
| 597 | | - if (!mask->ref_count) { |
|---|
| 598 | | - list_del_rcu(&mask->list); |
|---|
| 599 | | - kfree_rcu(mask, rcu); |
|---|
| 600 | | - } |
|---|
| 601 | | - } |
|---|
| 602 | 955 | } |
|---|
| 603 | 956 | |
|---|
| 604 | 957 | /* Must be called with OVS mutex held. */ |
|---|
| .. | .. |
|---|
| 608 | 961 | struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); |
|---|
| 609 | 962 | |
|---|
| 610 | 963 | BUG_ON(table->count == 0); |
|---|
| 611 | | - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); |
|---|
| 612 | | - table->count--; |
|---|
| 613 | | - if (ovs_identifier_is_ufid(&flow->id)) { |
|---|
| 614 | | - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); |
|---|
| 615 | | - table->ufid_count--; |
|---|
| 616 | | - } |
|---|
| 617 | | - |
|---|
| 618 | | - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be |
|---|
| 619 | | - * accessible as long as the RCU read lock is held. |
|---|
| 620 | | - */ |
|---|
| 621 | | - flow_mask_remove(table, flow->mask); |
|---|
| 964 | + table_instance_flow_free(table, ti, ufid_ti, flow); |
|---|
| 622 | 965 | } |
|---|
| 623 | 966 | |
|---|
| 624 | 967 | static struct sw_flow_mask *mask_alloc(void) |
|---|
| .. | .. |
|---|
| 646 | 989 | static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, |
|---|
| 647 | 990 | const struct sw_flow_mask *mask) |
|---|
| 648 | 991 | { |
|---|
| 649 | | - struct list_head *ml; |
|---|
| 992 | + struct mask_array *ma; |
|---|
| 993 | + int i; |
|---|
| 650 | 994 | |
|---|
| 651 | | - list_for_each(ml, &tbl->mask_list) { |
|---|
| 652 | | - struct sw_flow_mask *m; |
|---|
| 653 | | - m = container_of(ml, struct sw_flow_mask, list); |
|---|
| 654 | | - if (mask_equal(mask, m)) |
|---|
| 655 | | - return m; |
|---|
| 995 | + ma = ovsl_dereference(tbl->mask_array); |
|---|
| 996 | + for (i = 0; i < ma->max; i++) { |
|---|
| 997 | + struct sw_flow_mask *t; |
|---|
| 998 | + t = ovsl_dereference(ma->masks[i]); |
|---|
| 999 | + |
|---|
| 1000 | + if (t && mask_equal(mask, t)) |
|---|
| 1001 | + return t; |
|---|
| 656 | 1002 | } |
|---|
| 657 | 1003 | |
|---|
| 658 | 1004 | return NULL; |
|---|
| .. | .. |
|---|
| 663 | 1009 | const struct sw_flow_mask *new) |
|---|
| 664 | 1010 | { |
|---|
| 665 | 1011 | struct sw_flow_mask *mask; |
|---|
| 1012 | + |
|---|
| 666 | 1013 | mask = flow_mask_find(tbl, new); |
|---|
| 667 | 1014 | if (!mask) { |
|---|
| 668 | 1015 | /* Allocate a new mask if none exsits. */ |
|---|
| .. | .. |
|---|
| 671 | 1018 | return -ENOMEM; |
|---|
| 672 | 1019 | mask->key = new->key; |
|---|
| 673 | 1020 | mask->range = new->range; |
|---|
| 674 | | - list_add_rcu(&mask->list, &tbl->mask_list); |
|---|
| 1021 | + |
|---|
| 1022 | + /* Add mask to mask-list. */ |
|---|
| 1023 | + if (tbl_mask_array_add_mask(tbl, mask)) { |
|---|
| 1024 | + kfree(mask); |
|---|
| 1025 | + return -ENOMEM; |
|---|
| 1026 | + } |
|---|
| 675 | 1027 | } else { |
|---|
| 676 | 1028 | BUG_ON(!mask->ref_count); |
|---|
| 677 | 1029 | mask->ref_count++; |
|---|
| .. | .. |
|---|
| 743 | 1095 | return 0; |
|---|
| 744 | 1096 | } |
|---|
| 745 | 1097 | |
|---|
| 1098 | +static int compare_mask_and_count(const void *a, const void *b) |
|---|
| 1099 | +{ |
|---|
| 1100 | + const struct mask_count *mc_a = a; |
|---|
| 1101 | + const struct mask_count *mc_b = b; |
|---|
| 1102 | + |
|---|
| 1103 | + return (s64)mc_b->counter - (s64)mc_a->counter; |
|---|
| 1104 | +} |
|---|
| 1105 | + |
|---|
| 1106 | +/* Must be called with OVS mutex held. */ |
|---|
| 1107 | +void ovs_flow_masks_rebalance(struct flow_table *table) |
|---|
| 1108 | +{ |
|---|
| 1109 | + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); |
|---|
| 1110 | + struct mask_count *masks_and_count; |
|---|
| 1111 | + struct mask_array *new; |
|---|
| 1112 | + int masks_entries = 0; |
|---|
| 1113 | + int i; |
|---|
| 1114 | + |
|---|
| 1115 | + /* Build array of all current entries with use counters. */ |
|---|
| 1116 | + masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count), |
|---|
| 1117 | + GFP_KERNEL); |
|---|
| 1118 | + if (!masks_and_count) |
|---|
| 1119 | + return; |
|---|
| 1120 | + |
|---|
| 1121 | + for (i = 0; i < ma->max; i++) { |
|---|
| 1122 | + struct sw_flow_mask *mask; |
|---|
| 1123 | + int cpu; |
|---|
| 1124 | + |
|---|
| 1125 | + mask = rcu_dereference_ovsl(ma->masks[i]); |
|---|
| 1126 | + if (unlikely(!mask)) |
|---|
| 1127 | + break; |
|---|
| 1128 | + |
|---|
| 1129 | + masks_and_count[i].index = i; |
|---|
| 1130 | + masks_and_count[i].counter = 0; |
|---|
| 1131 | + |
|---|
| 1132 | + for_each_possible_cpu(cpu) { |
|---|
| 1133 | + struct mask_array_stats *stats; |
|---|
| 1134 | + unsigned int start; |
|---|
| 1135 | + u64 counter; |
|---|
| 1136 | + |
|---|
| 1137 | + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); |
|---|
| 1138 | + do { |
|---|
| 1139 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
|---|
| 1140 | + counter = stats->usage_cntrs[i]; |
|---|
| 1141 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, |
|---|
| 1142 | + start)); |
|---|
| 1143 | + |
|---|
| 1144 | + masks_and_count[i].counter += counter; |
|---|
| 1145 | + } |
|---|
| 1146 | + |
|---|
| 1147 | + /* Subtract the zero count value. */ |
|---|
| 1148 | + masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i]; |
|---|
| 1149 | + |
|---|
| 1150 | + /* Rather than calling tbl_mask_array_reset_counters() |
|---|
| 1151 | + * below when no change is needed, do it inline here. |
|---|
| 1152 | + */ |
|---|
| 1153 | + ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter; |
|---|
| 1154 | + } |
|---|
| 1155 | + |
|---|
| 1156 | + if (i == 0) |
|---|
| 1157 | + goto free_mask_entries; |
|---|
| 1158 | + |
|---|
| 1159 | + /* Sort the entries */ |
|---|
| 1160 | + masks_entries = i; |
|---|
| 1161 | + sort(masks_and_count, masks_entries, sizeof(*masks_and_count), |
|---|
| 1162 | + compare_mask_and_count, NULL); |
|---|
| 1163 | + |
|---|
| 1164 | + /* If the order is the same, nothing to do... */ |
|---|
| 1165 | + for (i = 0; i < masks_entries; i++) { |
|---|
| 1166 | + if (i != masks_and_count[i].index) |
|---|
| 1167 | + break; |
|---|
| 1168 | + } |
|---|
| 1169 | + if (i == masks_entries) |
|---|
| 1170 | + goto free_mask_entries; |
|---|
| 1171 | + |
|---|
| 1172 | + /* Rebuilt the new list in order of usage. */ |
|---|
| 1173 | + new = tbl_mask_array_alloc(ma->max); |
|---|
| 1174 | + if (!new) |
|---|
| 1175 | + goto free_mask_entries; |
|---|
| 1176 | + |
|---|
| 1177 | + for (i = 0; i < masks_entries; i++) { |
|---|
| 1178 | + int index = masks_and_count[i].index; |
|---|
| 1179 | + |
|---|
| 1180 | + if (ovsl_dereference(ma->masks[index])) |
|---|
| 1181 | + new->masks[new->count++] = ma->masks[index]; |
|---|
| 1182 | + } |
|---|
| 1183 | + |
|---|
| 1184 | + rcu_assign_pointer(table->mask_array, new); |
|---|
| 1185 | + call_rcu(&ma->rcu, mask_array_rcu_cb); |
|---|
| 1186 | + |
|---|
| 1187 | +free_mask_entries: |
|---|
| 1188 | + kfree(masks_and_count); |
|---|
| 1189 | +} |
|---|
| 1190 | + |
|---|
| 746 | 1191 | /* Initializes the flow module. |
|---|
| 747 | 1192 | * Returns zero if successful or a negative error code. */ |
|---|
| 748 | 1193 | int ovs_flow_init(void) |
|---|
| .. | .. |
|---|
| 752 | 1197 | |
|---|
| 753 | 1198 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) |
|---|
| 754 | 1199 | + (nr_cpu_ids |
|---|
| 755 | | - * sizeof(struct flow_stats *)), |
|---|
| 1200 | + * sizeof(struct sw_flow_stats *)), |
|---|
| 756 | 1201 | 0, 0, NULL); |
|---|
| 757 | 1202 | if (flow_cache == NULL) |
|---|
| 758 | 1203 | return -ENOMEM; |
|---|
| 759 | 1204 | |
|---|
| 760 | 1205 | flow_stats_cache |
|---|
| 761 | | - = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), |
|---|
| 1206 | + = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats), |
|---|
| 762 | 1207 | 0, SLAB_HWCACHE_ALIGN, NULL); |
|---|
| 763 | 1208 | if (flow_stats_cache == NULL) { |
|---|
| 764 | 1209 | kmem_cache_destroy(flow_cache); |
|---|