.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (c) 2007-2014 Nicira, Inc. |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or |
---|
5 | | - * modify it under the terms of version 2 of the GNU General Public |
---|
6 | | - * License as published by the Free Software Foundation. |
---|
7 | | - * |
---|
8 | | - * This program is distributed in the hope that it will be useful, but |
---|
9 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
11 | | - * General Public License for more details. |
---|
12 | | - * |
---|
13 | | - * You should have received a copy of the GNU General Public License |
---|
14 | | - * along with this program; if not, write to the Free Software |
---|
15 | | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
---|
16 | | - * 02110-1301, USA |
---|
17 | 4 | */ |
---|
18 | 5 | |
---|
19 | 6 | #include "flow.h" |
---|
.. | .. |
---|
42 | 29 | #include <linux/icmp.h> |
---|
43 | 30 | #include <linux/icmpv6.h> |
---|
44 | 31 | #include <linux/rculist.h> |
---|
| 32 | +#include <linux/sort.h> |
---|
45 | 33 | #include <net/ip.h> |
---|
46 | 34 | #include <net/ipv6.h> |
---|
47 | 35 | #include <net/ndisc.h> |
---|
48 | 36 | |
---|
49 | 37 | #define TBL_MIN_BUCKETS 1024 |
---|
| 38 | +#define MASK_ARRAY_SIZE_MIN 16 |
---|
50 | 39 | #define REHASH_INTERVAL (10 * 60 * HZ) |
---|
| 40 | + |
---|
| 41 | +#define MC_DEFAULT_HASH_ENTRIES 256 |
---|
| 42 | +#define MC_HASH_SHIFT 8 |
---|
| 43 | +#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) |
---|
51 | 44 | |
---|
52 | 45 | static struct kmem_cache *flow_cache; |
---|
53 | 46 | struct kmem_cache *flow_stats_cache __read_mostly; |
---|
.. | .. |
---|
79 | 72 | struct sw_flow *ovs_flow_alloc(void) |
---|
80 | 73 | { |
---|
81 | 74 | struct sw_flow *flow; |
---|
82 | | - struct flow_stats *stats; |
---|
| 75 | + struct sw_flow_stats *stats; |
---|
83 | 76 | |
---|
84 | 77 | flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); |
---|
85 | 78 | if (!flow) |
---|
.. | .. |
---|
111 | 104 | return table->count; |
---|
112 | 105 | } |
---|
113 | 106 | |
---|
114 | | -static struct flex_array *alloc_buckets(unsigned int n_buckets) |
---|
115 | | -{ |
---|
116 | | - struct flex_array *buckets; |
---|
117 | | - int i, err; |
---|
118 | | - |
---|
119 | | - buckets = flex_array_alloc(sizeof(struct hlist_head), |
---|
120 | | - n_buckets, GFP_KERNEL); |
---|
121 | | - if (!buckets) |
---|
122 | | - return NULL; |
---|
123 | | - |
---|
124 | | - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); |
---|
125 | | - if (err) { |
---|
126 | | - flex_array_free(buckets); |
---|
127 | | - return NULL; |
---|
128 | | - } |
---|
129 | | - |
---|
130 | | - for (i = 0; i < n_buckets; i++) |
---|
131 | | - INIT_HLIST_HEAD((struct hlist_head *) |
---|
132 | | - flex_array_get(buckets, i)); |
---|
133 | | - |
---|
134 | | - return buckets; |
---|
135 | | -} |
---|
136 | | - |
---|
137 | 107 | static void flow_free(struct sw_flow *flow) |
---|
138 | 108 | { |
---|
139 | 109 | int cpu; |
---|
.. | .. |
---|
141 | 111 | if (ovs_identifier_is_key(&flow->id)) |
---|
142 | 112 | kfree(flow->id.unmasked_key); |
---|
143 | 113 | if (flow->sf_acts) |
---|
144 | | - ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); |
---|
| 114 | + ovs_nla_free_flow_actions((struct sw_flow_actions __force *) |
---|
| 115 | + flow->sf_acts); |
---|
145 | 116 | /* We open code this to make sure cpu 0 is always considered */ |
---|
146 | | - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) |
---|
| 117 | + for (cpu = 0; cpu < nr_cpu_ids; |
---|
| 118 | + cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { |
---|
147 | 119 | if (flow->stats[cpu]) |
---|
148 | 120 | kmem_cache_free(flow_stats_cache, |
---|
149 | | - (struct flow_stats __force *)flow->stats[cpu]); |
---|
| 121 | + (struct sw_flow_stats __force *)flow->stats[cpu]); |
---|
| 122 | + } |
---|
| 123 | + |
---|
150 | 124 | kmem_cache_free(flow_cache, flow); |
---|
151 | 125 | } |
---|
152 | 126 | |
---|
.. | .. |
---|
168 | 142 | flow_free(flow); |
---|
169 | 143 | } |
---|
170 | 144 | |
---|
171 | | -static void free_buckets(struct flex_array *buckets) |
---|
172 | | -{ |
---|
173 | | - flex_array_free(buckets); |
---|
174 | | -} |
---|
175 | | - |
---|
176 | | - |
---|
177 | 145 | static void __table_instance_destroy(struct table_instance *ti) |
---|
178 | 146 | { |
---|
179 | | - free_buckets(ti->buckets); |
---|
| 147 | + kvfree(ti->buckets); |
---|
180 | 148 | kfree(ti); |
---|
181 | 149 | } |
---|
182 | 150 | |
---|
183 | 151 | static struct table_instance *table_instance_alloc(int new_size) |
---|
184 | 152 | { |
---|
185 | 153 | struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); |
---|
| 154 | + int i; |
---|
186 | 155 | |
---|
187 | 156 | if (!ti) |
---|
188 | 157 | return NULL; |
---|
189 | 158 | |
---|
190 | | - ti->buckets = alloc_buckets(new_size); |
---|
191 | | - |
---|
| 159 | + ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head), |
---|
| 160 | + GFP_KERNEL); |
---|
192 | 161 | if (!ti->buckets) { |
---|
193 | 162 | kfree(ti); |
---|
194 | 163 | return NULL; |
---|
195 | 164 | } |
---|
| 165 | + |
---|
| 166 | + for (i = 0; i < new_size; i++) |
---|
| 167 | + INIT_HLIST_HEAD(&ti->buckets[i]); |
---|
| 168 | + |
---|
196 | 169 | ti->n_buckets = new_size; |
---|
197 | 170 | ti->node_ver = 0; |
---|
198 | | - ti->keep_flows = false; |
---|
199 | 171 | get_random_bytes(&ti->hash_seed, sizeof(u32)); |
---|
200 | 172 | |
---|
201 | 173 | return ti; |
---|
202 | 174 | } |
---|
203 | 175 | |
---|
| 176 | +static void __mask_array_destroy(struct mask_array *ma) |
---|
| 177 | +{ |
---|
| 178 | + free_percpu(ma->masks_usage_stats); |
---|
| 179 | + kfree(ma); |
---|
| 180 | +} |
---|
| 181 | + |
---|
| 182 | +static void mask_array_rcu_cb(struct rcu_head *rcu) |
---|
| 183 | +{ |
---|
| 184 | + struct mask_array *ma = container_of(rcu, struct mask_array, rcu); |
---|
| 185 | + |
---|
| 186 | + __mask_array_destroy(ma); |
---|
| 187 | +} |
---|
| 188 | + |
---|
| 189 | +static void tbl_mask_array_reset_counters(struct mask_array *ma) |
---|
| 190 | +{ |
---|
| 191 | + int i, cpu; |
---|
| 192 | + |
---|
| 193 | + /* As the per CPU counters are not atomic we can not go ahead and |
---|
| 194 | + * reset them from another CPU. To be able to still have an approximate |
---|
| 195 | + * zero based counter we store the value at reset, and subtract it |
---|
| 196 | + * later when processing. |
---|
| 197 | + */ |
---|
| 198 | + for (i = 0; i < ma->max; i++) { |
---|
| 199 | + ma->masks_usage_zero_cntr[i] = 0; |
---|
| 200 | + |
---|
| 201 | + for_each_possible_cpu(cpu) { |
---|
| 202 | + struct mask_array_stats *stats; |
---|
| 203 | + unsigned int start; |
---|
| 204 | + u64 counter; |
---|
| 205 | + |
---|
| 206 | + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); |
---|
| 207 | + do { |
---|
| 208 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
---|
| 209 | + counter = stats->usage_cntrs[i]; |
---|
| 210 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); |
---|
| 211 | + |
---|
| 212 | + ma->masks_usage_zero_cntr[i] += counter; |
---|
| 213 | + } |
---|
| 214 | + } |
---|
| 215 | +} |
---|
| 216 | + |
---|
| 217 | +static struct mask_array *tbl_mask_array_alloc(int size) |
---|
| 218 | +{ |
---|
| 219 | + struct mask_array *new; |
---|
| 220 | + |
---|
| 221 | + size = max(MASK_ARRAY_SIZE_MIN, size); |
---|
| 222 | + new = kzalloc(sizeof(struct mask_array) + |
---|
| 223 | + sizeof(struct sw_flow_mask *) * size + |
---|
| 224 | + sizeof(u64) * size, GFP_KERNEL); |
---|
| 225 | + if (!new) |
---|
| 226 | + return NULL; |
---|
| 227 | + |
---|
| 228 | + new->masks_usage_zero_cntr = (u64 *)((u8 *)new + |
---|
| 229 | + sizeof(struct mask_array) + |
---|
| 230 | + sizeof(struct sw_flow_mask *) * |
---|
| 231 | + size); |
---|
| 232 | + |
---|
| 233 | + new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + |
---|
| 234 | + sizeof(u64) * size, |
---|
| 235 | + __alignof__(u64)); |
---|
| 236 | + if (!new->masks_usage_stats) { |
---|
| 237 | + kfree(new); |
---|
| 238 | + return NULL; |
---|
| 239 | + } |
---|
| 240 | + |
---|
| 241 | + new->count = 0; |
---|
| 242 | + new->max = size; |
---|
| 243 | + |
---|
| 244 | + return new; |
---|
| 245 | +} |
---|
| 246 | + |
---|
| 247 | +static int tbl_mask_array_realloc(struct flow_table *tbl, int size) |
---|
| 248 | +{ |
---|
| 249 | + struct mask_array *old; |
---|
| 250 | + struct mask_array *new; |
---|
| 251 | + |
---|
| 252 | + new = tbl_mask_array_alloc(size); |
---|
| 253 | + if (!new) |
---|
| 254 | + return -ENOMEM; |
---|
| 255 | + |
---|
| 256 | + old = ovsl_dereference(tbl->mask_array); |
---|
| 257 | + if (old) { |
---|
| 258 | + int i; |
---|
| 259 | + |
---|
| 260 | + for (i = 0; i < old->max; i++) { |
---|
| 261 | + if (ovsl_dereference(old->masks[i])) |
---|
| 262 | + new->masks[new->count++] = old->masks[i]; |
---|
| 263 | + } |
---|
| 264 | + call_rcu(&old->rcu, mask_array_rcu_cb); |
---|
| 265 | + } |
---|
| 266 | + |
---|
| 267 | + rcu_assign_pointer(tbl->mask_array, new); |
---|
| 268 | + |
---|
| 269 | + return 0; |
---|
| 270 | +} |
---|
| 271 | + |
---|
| 272 | +static int tbl_mask_array_add_mask(struct flow_table *tbl, |
---|
| 273 | + struct sw_flow_mask *new) |
---|
| 274 | +{ |
---|
| 275 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
---|
| 276 | + int err, ma_count = READ_ONCE(ma->count); |
---|
| 277 | + |
---|
| 278 | + if (ma_count >= ma->max) { |
---|
| 279 | + err = tbl_mask_array_realloc(tbl, ma->max + |
---|
| 280 | + MASK_ARRAY_SIZE_MIN); |
---|
| 281 | + if (err) |
---|
| 282 | + return err; |
---|
| 283 | + |
---|
| 284 | + ma = ovsl_dereference(tbl->mask_array); |
---|
| 285 | + } else { |
---|
| 286 | + /* On every add or delete we need to reset the counters so |
---|
| 287 | + * every new mask gets a fair chance of being prioritized. |
---|
| 288 | + */ |
---|
| 289 | + tbl_mask_array_reset_counters(ma); |
---|
| 290 | + } |
---|
| 291 | + |
---|
| 292 | + BUG_ON(ovsl_dereference(ma->masks[ma_count])); |
---|
| 293 | + |
---|
| 294 | + rcu_assign_pointer(ma->masks[ma_count], new); |
---|
| 295 | + WRITE_ONCE(ma->count, ma_count + 1); |
---|
| 296 | + |
---|
| 297 | + return 0; |
---|
| 298 | +} |
---|
| 299 | + |
---|
| 300 | +static void tbl_mask_array_del_mask(struct flow_table *tbl, |
---|
| 301 | + struct sw_flow_mask *mask) |
---|
| 302 | +{ |
---|
| 303 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
---|
| 304 | + int i, ma_count = READ_ONCE(ma->count); |
---|
| 305 | + |
---|
| 306 | + /* Remove the deleted mask pointers from the array */ |
---|
| 307 | + for (i = 0; i < ma_count; i++) { |
---|
| 308 | + if (mask == ovsl_dereference(ma->masks[i])) |
---|
| 309 | + goto found; |
---|
| 310 | + } |
---|
| 311 | + |
---|
| 312 | + BUG(); |
---|
| 313 | + return; |
---|
| 314 | + |
---|
| 315 | +found: |
---|
| 316 | + WRITE_ONCE(ma->count, ma_count - 1); |
---|
| 317 | + |
---|
| 318 | + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]); |
---|
| 319 | + RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL); |
---|
| 320 | + |
---|
| 321 | + kfree_rcu(mask, rcu); |
---|
| 322 | + |
---|
| 323 | + /* Shrink the mask array if necessary. */ |
---|
| 324 | + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && |
---|
| 325 | + ma_count <= (ma->max / 3)) |
---|
| 326 | + tbl_mask_array_realloc(tbl, ma->max / 2); |
---|
| 327 | + else |
---|
| 328 | + tbl_mask_array_reset_counters(ma); |
---|
| 329 | + |
---|
| 330 | +} |
---|
| 331 | + |
---|
| 332 | +/* Remove 'mask' from the mask list, if it is not needed any more. */ |
---|
| 333 | +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) |
---|
| 334 | +{ |
---|
| 335 | + if (mask) { |
---|
| 336 | + /* ovs-lock is required to protect mask-refcount and |
---|
| 337 | + * mask list. |
---|
| 338 | + */ |
---|
| 339 | + ASSERT_OVSL(); |
---|
| 340 | + BUG_ON(!mask->ref_count); |
---|
| 341 | + mask->ref_count--; |
---|
| 342 | + |
---|
| 343 | + if (!mask->ref_count) |
---|
| 344 | + tbl_mask_array_del_mask(tbl, mask); |
---|
| 345 | + } |
---|
| 346 | +} |
---|
| 347 | + |
---|
| 348 | +static void __mask_cache_destroy(struct mask_cache *mc) |
---|
| 349 | +{ |
---|
| 350 | + free_percpu(mc->mask_cache); |
---|
| 351 | + kfree(mc); |
---|
| 352 | +} |
---|
| 353 | + |
---|
| 354 | +static void mask_cache_rcu_cb(struct rcu_head *rcu) |
---|
| 355 | +{ |
---|
| 356 | + struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu); |
---|
| 357 | + |
---|
| 358 | + __mask_cache_destroy(mc); |
---|
| 359 | +} |
---|
| 360 | + |
---|
| 361 | +static struct mask_cache *tbl_mask_cache_alloc(u32 size) |
---|
| 362 | +{ |
---|
| 363 | + struct mask_cache_entry __percpu *cache = NULL; |
---|
| 364 | + struct mask_cache *new; |
---|
| 365 | + |
---|
| 366 | + /* Only allow size to be 0, or a power of 2, and does not exceed |
---|
| 367 | + * percpu allocation size. |
---|
| 368 | + */ |
---|
| 369 | + if ((!is_power_of_2(size) && size != 0) || |
---|
| 370 | + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) |
---|
| 371 | + return NULL; |
---|
| 372 | + |
---|
| 373 | + new = kzalloc(sizeof(*new), GFP_KERNEL); |
---|
| 374 | + if (!new) |
---|
| 375 | + return NULL; |
---|
| 376 | + |
---|
| 377 | + new->cache_size = size; |
---|
| 378 | + if (new->cache_size > 0) { |
---|
| 379 | + cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry), |
---|
| 380 | + new->cache_size), |
---|
| 381 | + __alignof__(struct mask_cache_entry)); |
---|
| 382 | + if (!cache) { |
---|
| 383 | + kfree(new); |
---|
| 384 | + return NULL; |
---|
| 385 | + } |
---|
| 386 | + } |
---|
| 387 | + |
---|
| 388 | + new->mask_cache = cache; |
---|
| 389 | + return new; |
---|
| 390 | +} |
---|
| 391 | +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) |
---|
| 392 | +{ |
---|
| 393 | + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); |
---|
| 394 | + struct mask_cache *new; |
---|
| 395 | + |
---|
| 396 | + if (size == mc->cache_size) |
---|
| 397 | + return 0; |
---|
| 398 | + |
---|
| 399 | + if ((!is_power_of_2(size) && size != 0) || |
---|
| 400 | + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) |
---|
| 401 | + return -EINVAL; |
---|
| 402 | + |
---|
| 403 | + new = tbl_mask_cache_alloc(size); |
---|
| 404 | + if (!new) |
---|
| 405 | + return -ENOMEM; |
---|
| 406 | + |
---|
| 407 | + rcu_assign_pointer(table->mask_cache, new); |
---|
| 408 | + call_rcu(&mc->rcu, mask_cache_rcu_cb); |
---|
| 409 | + |
---|
| 410 | + return 0; |
---|
| 411 | +} |
---|
| 412 | + |
---|
204 | 413 | int ovs_flow_tbl_init(struct flow_table *table) |
---|
205 | 414 | { |
---|
206 | 415 | struct table_instance *ti, *ufid_ti; |
---|
| 416 | + struct mask_cache *mc; |
---|
| 417 | + struct mask_array *ma; |
---|
| 418 | + |
---|
| 419 | + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); |
---|
| 420 | + if (!mc) |
---|
| 421 | + return -ENOMEM; |
---|
| 422 | + |
---|
| 423 | + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); |
---|
| 424 | + if (!ma) |
---|
| 425 | + goto free_mask_cache; |
---|
207 | 426 | |
---|
208 | 427 | ti = table_instance_alloc(TBL_MIN_BUCKETS); |
---|
209 | | - |
---|
210 | 428 | if (!ti) |
---|
211 | | - return -ENOMEM; |
---|
| 429 | + goto free_mask_array; |
---|
212 | 430 | |
---|
213 | 431 | ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); |
---|
214 | 432 | if (!ufid_ti) |
---|
.. | .. |
---|
216 | 434 | |
---|
217 | 435 | rcu_assign_pointer(table->ti, ti); |
---|
218 | 436 | rcu_assign_pointer(table->ufid_ti, ufid_ti); |
---|
219 | | - INIT_LIST_HEAD(&table->mask_list); |
---|
| 437 | + rcu_assign_pointer(table->mask_array, ma); |
---|
| 438 | + rcu_assign_pointer(table->mask_cache, mc); |
---|
220 | 439 | table->last_rehash = jiffies; |
---|
221 | 440 | table->count = 0; |
---|
222 | 441 | table->ufid_count = 0; |
---|
.. | .. |
---|
224 | 443 | |
---|
225 | 444 | free_ti: |
---|
226 | 445 | __table_instance_destroy(ti); |
---|
| 446 | +free_mask_array: |
---|
| 447 | + __mask_array_destroy(ma); |
---|
| 448 | +free_mask_cache: |
---|
| 449 | + __mask_cache_destroy(mc); |
---|
227 | 450 | return -ENOMEM; |
---|
228 | 451 | } |
---|
229 | 452 | |
---|
230 | 453 | static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) |
---|
231 | 454 | { |
---|
232 | | - struct table_instance *ti = container_of(rcu, struct table_instance, rcu); |
---|
| 455 | + struct table_instance *ti; |
---|
233 | 456 | |
---|
| 457 | + ti = container_of(rcu, struct table_instance, rcu); |
---|
234 | 458 | __table_instance_destroy(ti); |
---|
235 | 459 | } |
---|
236 | 460 | |
---|
237 | | -static void table_instance_destroy(struct table_instance *ti, |
---|
238 | | - struct table_instance *ufid_ti, |
---|
239 | | - bool deferred) |
---|
| 461 | +static void table_instance_flow_free(struct flow_table *table, |
---|
| 462 | + struct table_instance *ti, |
---|
| 463 | + struct table_instance *ufid_ti, |
---|
| 464 | + struct sw_flow *flow) |
---|
| 465 | +{ |
---|
| 466 | + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); |
---|
| 467 | + table->count--; |
---|
| 468 | + |
---|
| 469 | + if (ovs_identifier_is_ufid(&flow->id)) { |
---|
| 470 | + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); |
---|
| 471 | + table->ufid_count--; |
---|
| 472 | + } |
---|
| 473 | + |
---|
| 474 | + flow_mask_remove(table, flow->mask); |
---|
| 475 | +} |
---|
| 476 | + |
---|
| 477 | +/* Must be called with OVS mutex held. */ |
---|
| 478 | +void table_instance_flow_flush(struct flow_table *table, |
---|
| 479 | + struct table_instance *ti, |
---|
| 480 | + struct table_instance *ufid_ti) |
---|
240 | 481 | { |
---|
241 | 482 | int i; |
---|
242 | 483 | |
---|
243 | | - if (!ti) |
---|
244 | | - return; |
---|
245 | | - |
---|
246 | | - BUG_ON(!ufid_ti); |
---|
247 | | - if (ti->keep_flows) |
---|
248 | | - goto skip_flows; |
---|
249 | | - |
---|
250 | 484 | for (i = 0; i < ti->n_buckets; i++) { |
---|
251 | | - struct sw_flow *flow; |
---|
252 | | - struct hlist_head *head = flex_array_get(ti->buckets, i); |
---|
| 485 | + struct hlist_head *head = &ti->buckets[i]; |
---|
253 | 486 | struct hlist_node *n; |
---|
254 | | - int ver = ti->node_ver; |
---|
255 | | - int ufid_ver = ufid_ti->node_ver; |
---|
| 487 | + struct sw_flow *flow; |
---|
256 | 488 | |
---|
257 | | - hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { |
---|
258 | | - hlist_del_rcu(&flow->flow_table.node[ver]); |
---|
259 | | - if (ovs_identifier_is_ufid(&flow->id)) |
---|
260 | | - hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); |
---|
261 | | - ovs_flow_free(flow, deferred); |
---|
| 489 | + hlist_for_each_entry_safe(flow, n, head, |
---|
| 490 | + flow_table.node[ti->node_ver]) { |
---|
| 491 | + |
---|
| 492 | + table_instance_flow_free(table, ti, ufid_ti, |
---|
| 493 | + flow); |
---|
| 494 | + ovs_flow_free(flow, true); |
---|
262 | 495 | } |
---|
263 | 496 | } |
---|
264 | 497 | |
---|
265 | | -skip_flows: |
---|
266 | | - if (deferred) { |
---|
267 | | - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); |
---|
268 | | - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); |
---|
269 | | - } else { |
---|
270 | | - __table_instance_destroy(ti); |
---|
271 | | - __table_instance_destroy(ufid_ti); |
---|
| 498 | + if (WARN_ON(table->count != 0 || |
---|
| 499 | + table->ufid_count != 0)) { |
---|
| 500 | + table->count = 0; |
---|
| 501 | + table->ufid_count = 0; |
---|
272 | 502 | } |
---|
| 503 | +} |
---|
| 504 | + |
---|
| 505 | +static void table_instance_destroy(struct table_instance *ti, |
---|
| 506 | + struct table_instance *ufid_ti) |
---|
| 507 | +{ |
---|
| 508 | + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); |
---|
| 509 | + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); |
---|
273 | 510 | } |
---|
274 | 511 | |
---|
275 | 512 | /* No need for locking this function is called from RCU callback or |
---|
.. | .. |
---|
279 | 516 | { |
---|
280 | 517 | struct table_instance *ti = rcu_dereference_raw(table->ti); |
---|
281 | 518 | struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); |
---|
| 519 | + struct mask_cache *mc = rcu_dereference_raw(table->mask_cache); |
---|
| 520 | + struct mask_array *ma = rcu_dereference_raw(table->mask_array); |
---|
282 | 521 | |
---|
283 | | - table_instance_destroy(ti, ufid_ti, false); |
---|
| 522 | + call_rcu(&mc->rcu, mask_cache_rcu_cb); |
---|
| 523 | + call_rcu(&ma->rcu, mask_array_rcu_cb); |
---|
| 524 | + table_instance_destroy(ti, ufid_ti); |
---|
284 | 525 | } |
---|
285 | 526 | |
---|
286 | 527 | struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, |
---|
.. | .. |
---|
294 | 535 | ver = ti->node_ver; |
---|
295 | 536 | while (*bucket < ti->n_buckets) { |
---|
296 | 537 | i = 0; |
---|
297 | | - head = flex_array_get(ti->buckets, *bucket); |
---|
| 538 | + head = &ti->buckets[*bucket]; |
---|
298 | 539 | hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { |
---|
299 | 540 | if (i < *last) { |
---|
300 | 541 | i++; |
---|
.. | .. |
---|
313 | 554 | static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) |
---|
314 | 555 | { |
---|
315 | 556 | hash = jhash_1word(hash, ti->hash_seed); |
---|
316 | | - return flex_array_get(ti->buckets, |
---|
317 | | - (hash & (ti->n_buckets - 1))); |
---|
| 557 | + return &ti->buckets[hash & (ti->n_buckets - 1)]; |
---|
318 | 558 | } |
---|
319 | 559 | |
---|
320 | 560 | static void table_instance_insert(struct table_instance *ti, |
---|
.. | .. |
---|
347 | 587 | /* Insert in new table. */ |
---|
348 | 588 | for (i = 0; i < old->n_buckets; i++) { |
---|
349 | 589 | struct sw_flow *flow; |
---|
350 | | - struct hlist_head *head; |
---|
351 | | - |
---|
352 | | - head = flex_array_get(old->buckets, i); |
---|
| 590 | + struct hlist_head *head = &old->buckets[i]; |
---|
353 | 591 | |
---|
354 | 592 | if (ufid) |
---|
355 | | - hlist_for_each_entry(flow, head, |
---|
356 | | - ufid_table.node[old_ver]) |
---|
| 593 | + hlist_for_each_entry_rcu(flow, head, |
---|
| 594 | + ufid_table.node[old_ver], |
---|
| 595 | + lockdep_ovsl_is_held()) |
---|
357 | 596 | ufid_table_instance_insert(new, flow); |
---|
358 | 597 | else |
---|
359 | | - hlist_for_each_entry(flow, head, |
---|
360 | | - flow_table.node[old_ver]) |
---|
| 598 | + hlist_for_each_entry_rcu(flow, head, |
---|
| 599 | + flow_table.node[old_ver], |
---|
| 600 | + lockdep_ovsl_is_held()) |
---|
361 | 601 | table_instance_insert(new, flow); |
---|
362 | 602 | } |
---|
363 | | - |
---|
364 | | - old->keep_flows = true; |
---|
365 | 603 | } |
---|
366 | 604 | |
---|
367 | 605 | static struct table_instance *table_instance_rehash(struct table_instance *ti, |
---|
.. | .. |
---|
396 | 634 | rcu_assign_pointer(flow_table->ti, new_ti); |
---|
397 | 635 | rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); |
---|
398 | 636 | flow_table->last_rehash = jiffies; |
---|
399 | | - flow_table->count = 0; |
---|
400 | | - flow_table->ufid_count = 0; |
---|
401 | 637 | |
---|
402 | | - table_instance_destroy(old_ti, old_ufid_ti, true); |
---|
| 638 | + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); |
---|
| 639 | + table_instance_destroy(old_ti, old_ufid_ti); |
---|
403 | 640 | return 0; |
---|
404 | 641 | |
---|
405 | 642 | err_free_ti: |
---|
.. | .. |
---|
410 | 647 | static u32 flow_hash(const struct sw_flow_key *key, |
---|
411 | 648 | const struct sw_flow_key_range *range) |
---|
412 | 649 | { |
---|
413 | | - int key_start = range->start; |
---|
414 | | - int key_end = range->end; |
---|
415 | | - const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); |
---|
416 | | - int hash_u32s = (key_end - key_start) >> 2; |
---|
| 650 | + const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); |
---|
417 | 651 | |
---|
418 | 652 | /* Make sure number of hash bytes are multiple of u32. */ |
---|
419 | | - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); |
---|
| 653 | + int hash_u32s = range_n_bytes(range) >> 2; |
---|
420 | 654 | |
---|
421 | 655 | return jhash2(hash_key, hash_u32s, 0); |
---|
422 | 656 | } |
---|
.. | .. |
---|
427 | 661 | return 0; |
---|
428 | 662 | else |
---|
429 | 663 | return rounddown(offsetof(struct sw_flow_key, phy), |
---|
430 | | - sizeof(long)); |
---|
| 664 | + sizeof(long)); |
---|
431 | 665 | } |
---|
432 | 666 | |
---|
433 | 667 | static bool cmp_key(const struct sw_flow_key *key1, |
---|
.. | .. |
---|
439 | 673 | long diffs = 0; |
---|
440 | 674 | int i; |
---|
441 | 675 | |
---|
442 | | - for (i = key_start; i < key_end; i += sizeof(long)) |
---|
| 676 | + for (i = key_start; i < key_end; i += sizeof(long)) |
---|
443 | 677 | diffs |= *cp1++ ^ *cp2++; |
---|
444 | 678 | |
---|
445 | 679 | return diffs == 0; |
---|
.. | .. |
---|
465 | 699 | |
---|
466 | 700 | static struct sw_flow *masked_flow_lookup(struct table_instance *ti, |
---|
467 | 701 | const struct sw_flow_key *unmasked, |
---|
468 | | - const struct sw_flow_mask *mask) |
---|
| 702 | + const struct sw_flow_mask *mask, |
---|
| 703 | + u32 *n_mask_hit) |
---|
469 | 704 | { |
---|
470 | 705 | struct sw_flow *flow; |
---|
471 | 706 | struct hlist_head *head; |
---|
.. | .. |
---|
475 | 710 | ovs_flow_mask_key(&masked_key, unmasked, false, mask); |
---|
476 | 711 | hash = flow_hash(&masked_key, &mask->range); |
---|
477 | 712 | head = find_bucket(ti, hash); |
---|
478 | | - hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { |
---|
| 713 | + (*n_mask_hit)++; |
---|
| 714 | + |
---|
| 715 | + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], |
---|
| 716 | + lockdep_ovsl_is_held()) { |
---|
479 | 717 | if (flow->mask == mask && flow->flow_table.hash == hash && |
---|
480 | 718 | flow_cmp_masked_key(flow, &masked_key, &mask->range)) |
---|
481 | 719 | return flow; |
---|
.. | .. |
---|
483 | 721 | return NULL; |
---|
484 | 722 | } |
---|
485 | 723 | |
---|
486 | | -struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, |
---|
487 | | - const struct sw_flow_key *key, |
---|
488 | | - u32 *n_mask_hit) |
---|
| 724 | +/* Flow lookup does full lookup on flow table. It starts with |
---|
| 725 | + * mask from index passed in *index. |
---|
| 726 | + * This function MUST be called with BH disabled due to the use |
---|
| 727 | + * of CPU specific variables. |
---|
| 728 | + */ |
---|
| 729 | +static struct sw_flow *flow_lookup(struct flow_table *tbl, |
---|
| 730 | + struct table_instance *ti, |
---|
| 731 | + struct mask_array *ma, |
---|
| 732 | + const struct sw_flow_key *key, |
---|
| 733 | + u32 *n_mask_hit, |
---|
| 734 | + u32 *n_cache_hit, |
---|
| 735 | + u32 *index) |
---|
489 | 736 | { |
---|
490 | | - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
---|
491 | | - struct sw_flow_mask *mask; |
---|
| 737 | + struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats); |
---|
492 | 738 | struct sw_flow *flow; |
---|
| 739 | + struct sw_flow_mask *mask; |
---|
| 740 | + int i; |
---|
| 741 | + |
---|
| 742 | + if (likely(*index < ma->max)) { |
---|
| 743 | + mask = rcu_dereference_ovsl(ma->masks[*index]); |
---|
| 744 | + if (mask) { |
---|
| 745 | + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); |
---|
| 746 | + if (flow) { |
---|
| 747 | + u64_stats_update_begin(&stats->syncp); |
---|
| 748 | + stats->usage_cntrs[*index]++; |
---|
| 749 | + u64_stats_update_end(&stats->syncp); |
---|
| 750 | + (*n_cache_hit)++; |
---|
| 751 | + return flow; |
---|
| 752 | + } |
---|
| 753 | + } |
---|
| 754 | + } |
---|
| 755 | + |
---|
| 756 | + for (i = 0; i < ma->max; i++) { |
---|
| 757 | + |
---|
| 758 | + if (i == *index) |
---|
| 759 | + continue; |
---|
| 760 | + |
---|
| 761 | + mask = rcu_dereference_ovsl(ma->masks[i]); |
---|
| 762 | + if (unlikely(!mask)) |
---|
| 763 | + break; |
---|
| 764 | + |
---|
| 765 | + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); |
---|
| 766 | + if (flow) { /* Found */ |
---|
| 767 | + *index = i; |
---|
| 768 | + u64_stats_update_begin(&stats->syncp); |
---|
| 769 | + stats->usage_cntrs[*index]++; |
---|
| 770 | + u64_stats_update_end(&stats->syncp); |
---|
| 771 | + return flow; |
---|
| 772 | + } |
---|
| 773 | + } |
---|
| 774 | + |
---|
| 775 | + return NULL; |
---|
| 776 | +} |
---|
| 777 | + |
---|
| 778 | +/* |
---|
| 779 | + * mask_cache maps flow to probable mask. This cache is not tightly |
---|
| 780 | + * coupled cache, It means updates to mask list can result in inconsistent |
---|
| 781 | + * cache entry in mask cache. |
---|
| 782 | + * This is per cpu cache and is divided in MC_HASH_SEGS segments. |
---|
| 783 | + * In case of a hash collision the entry is hashed in next segment. |
---|
| 784 | + * */ |
---|
| 785 | +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, |
---|
| 786 | + const struct sw_flow_key *key, |
---|
| 787 | + u32 skb_hash, |
---|
| 788 | + u32 *n_mask_hit, |
---|
| 789 | + u32 *n_cache_hit) |
---|
| 790 | +{ |
---|
| 791 | + struct mask_cache *mc = rcu_dereference(tbl->mask_cache); |
---|
| 792 | + struct mask_array *ma = rcu_dereference(tbl->mask_array); |
---|
| 793 | + struct table_instance *ti = rcu_dereference(tbl->ti); |
---|
| 794 | + struct mask_cache_entry *entries, *ce; |
---|
| 795 | + struct sw_flow *flow; |
---|
| 796 | + u32 hash; |
---|
| 797 | + int seg; |
---|
493 | 798 | |
---|
494 | 799 | *n_mask_hit = 0; |
---|
495 | | - list_for_each_entry_rcu(mask, &tbl->mask_list, list) { |
---|
496 | | - (*n_mask_hit)++; |
---|
497 | | - flow = masked_flow_lookup(ti, key, mask); |
---|
498 | | - if (flow) /* Found */ |
---|
499 | | - return flow; |
---|
| 800 | + *n_cache_hit = 0; |
---|
| 801 | + if (unlikely(!skb_hash || mc->cache_size == 0)) { |
---|
| 802 | + u32 mask_index = 0; |
---|
| 803 | + u32 cache = 0; |
---|
| 804 | + |
---|
| 805 | + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache, |
---|
| 806 | + &mask_index); |
---|
500 | 807 | } |
---|
501 | | - return NULL; |
---|
| 808 | + |
---|
| 809 | + /* Pre and post recirulation flows usually have the same skb_hash |
---|
| 810 | + * value. To avoid hash collisions, rehash the 'skb_hash' with |
---|
| 811 | + * 'recirc_id'. */ |
---|
| 812 | + if (key->recirc_id) |
---|
| 813 | + skb_hash = jhash_1word(skb_hash, key->recirc_id); |
---|
| 814 | + |
---|
| 815 | + ce = NULL; |
---|
| 816 | + hash = skb_hash; |
---|
| 817 | + entries = this_cpu_ptr(mc->mask_cache); |
---|
| 818 | + |
---|
| 819 | + /* Find the cache entry 'ce' to operate on. */ |
---|
| 820 | + for (seg = 0; seg < MC_HASH_SEGS; seg++) { |
---|
| 821 | + int index = hash & (mc->cache_size - 1); |
---|
| 822 | + struct mask_cache_entry *e; |
---|
| 823 | + |
---|
| 824 | + e = &entries[index]; |
---|
| 825 | + if (e->skb_hash == skb_hash) { |
---|
| 826 | + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, |
---|
| 827 | + n_cache_hit, &e->mask_index); |
---|
| 828 | + if (!flow) |
---|
| 829 | + e->skb_hash = 0; |
---|
| 830 | + return flow; |
---|
| 831 | + } |
---|
| 832 | + |
---|
| 833 | + if (!ce || e->skb_hash < ce->skb_hash) |
---|
| 834 | + ce = e; /* A better replacement cache candidate. */ |
---|
| 835 | + |
---|
| 836 | + hash >>= MC_HASH_SHIFT; |
---|
| 837 | + } |
---|
| 838 | + |
---|
| 839 | + /* Cache miss, do full lookup. */ |
---|
| 840 | + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, |
---|
| 841 | + &ce->mask_index); |
---|
| 842 | + if (flow) |
---|
| 843 | + ce->skb_hash = skb_hash; |
---|
| 844 | + |
---|
| 845 | + *n_cache_hit = 0; |
---|
| 846 | + return flow; |
---|
502 | 847 | } |
---|
503 | 848 | |
---|
504 | 849 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, |
---|
505 | 850 | const struct sw_flow_key *key) |
---|
506 | 851 | { |
---|
| 852 | + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
---|
| 853 | + struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); |
---|
507 | 854 | u32 __always_unused n_mask_hit; |
---|
| 855 | + u32 __always_unused n_cache_hit; |
---|
| 856 | + struct sw_flow *flow; |
---|
| 857 | + u32 index = 0; |
---|
508 | 858 | |
---|
509 | | - return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); |
---|
| 859 | + /* This function gets called trough the netlink interface and therefore |
---|
| 860 | + * is preemptible. However, flow_lookup() function needs to be called |
---|
| 861 | + * with BH disabled due to CPU specific variables. |
---|
| 862 | + */ |
---|
| 863 | + local_bh_disable(); |
---|
| 864 | + flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); |
---|
| 865 | + local_bh_enable(); |
---|
| 866 | + return flow; |
---|
510 | 867 | } |
---|
511 | 868 | |
---|
512 | 869 | struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, |
---|
513 | 870 | const struct sw_flow_match *match) |
---|
514 | 871 | { |
---|
515 | | - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
---|
516 | | - struct sw_flow_mask *mask; |
---|
517 | | - struct sw_flow *flow; |
---|
| 872 | + struct mask_array *ma = ovsl_dereference(tbl->mask_array); |
---|
| 873 | + int i; |
---|
518 | 874 | |
---|
519 | 875 | /* Always called under ovs-mutex. */ |
---|
520 | | - list_for_each_entry(mask, &tbl->mask_list, list) { |
---|
521 | | - flow = masked_flow_lookup(ti, match->key, mask); |
---|
| 876 | + for (i = 0; i < ma->max; i++) { |
---|
| 877 | + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); |
---|
| 878 | + u32 __always_unused n_mask_hit; |
---|
| 879 | + struct sw_flow_mask *mask; |
---|
| 880 | + struct sw_flow *flow; |
---|
| 881 | + |
---|
| 882 | + mask = ovsl_dereference(ma->masks[i]); |
---|
| 883 | + if (!mask) |
---|
| 884 | + continue; |
---|
| 885 | + |
---|
| 886 | + flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); |
---|
522 | 887 | if (flow && ovs_identifier_is_key(&flow->id) && |
---|
523 | | - ovs_flow_cmp_unmasked_key(flow, match)) |
---|
| 888 | + ovs_flow_cmp_unmasked_key(flow, match)) { |
---|
524 | 889 | return flow; |
---|
| 890 | + } |
---|
525 | 891 | } |
---|
| 892 | + |
---|
526 | 893 | return NULL; |
---|
527 | 894 | } |
---|
528 | 895 | |
---|
.. | .. |
---|
540 | 907 | return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); |
---|
541 | 908 | } |
---|
542 | 909 | |
---|
543 | | -bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) |
---|
| 910 | +bool ovs_flow_cmp(const struct sw_flow *flow, |
---|
| 911 | + const struct sw_flow_match *match) |
---|
544 | 912 | { |
---|
545 | 913 | if (ovs_identifier_is_ufid(&flow->id)) |
---|
546 | 914 | return flow_cmp_masked_key(flow, match->key, &match->range); |
---|
.. | .. |
---|
558 | 926 | |
---|
559 | 927 | hash = ufid_hash(ufid); |
---|
560 | 928 | head = find_bucket(ti, hash); |
---|
561 | | - hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { |
---|
| 929 | + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], |
---|
| 930 | + lockdep_ovsl_is_held()) { |
---|
562 | 931 | if (flow->ufid_table.hash == hash && |
---|
563 | 932 | ovs_flow_cmp_ufid(flow, ufid)) |
---|
564 | 933 | return flow; |
---|
.. | .. |
---|
568 | 937 | |
---|
569 | 938 | int ovs_flow_tbl_num_masks(const struct flow_table *table) |
---|
570 | 939 | { |
---|
571 | | - struct sw_flow_mask *mask; |
---|
572 | | - int num = 0; |
---|
| 940 | + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); |
---|
| 941 | + return READ_ONCE(ma->count); |
---|
| 942 | +} |
---|
573 | 943 | |
---|
574 | | - list_for_each_entry(mask, &table->mask_list, list) |
---|
575 | | - num++; |
---|
| 944 | +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) |
---|
| 945 | +{ |
---|
| 946 | + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); |
---|
576 | 947 | |
---|
577 | | - return num; |
---|
| 948 | + return READ_ONCE(mc->cache_size); |
---|
578 | 949 | } |
---|
579 | 950 | |
---|
580 | 951 | static struct table_instance *table_instance_expand(struct table_instance *ti, |
---|
581 | 952 | bool ufid) |
---|
582 | 953 | { |
---|
583 | 954 | return table_instance_rehash(ti, ti->n_buckets * 2, ufid); |
---|
584 | | -} |
---|
585 | | - |
---|
586 | | -/* Remove 'mask' from the mask list, if it is not needed any more. */ |
---|
587 | | -static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) |
---|
588 | | -{ |
---|
589 | | - if (mask) { |
---|
590 | | - /* ovs-lock is required to protect mask-refcount and |
---|
591 | | - * mask list. |
---|
592 | | - */ |
---|
593 | | - ASSERT_OVSL(); |
---|
594 | | - BUG_ON(!mask->ref_count); |
---|
595 | | - mask->ref_count--; |
---|
596 | | - |
---|
597 | | - if (!mask->ref_count) { |
---|
598 | | - list_del_rcu(&mask->list); |
---|
599 | | - kfree_rcu(mask, rcu); |
---|
600 | | - } |
---|
601 | | - } |
---|
602 | 955 | } |
---|
603 | 956 | |
---|
604 | 957 | /* Must be called with OVS mutex held. */ |
---|
.. | .. |
---|
608 | 961 | struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); |
---|
609 | 962 | |
---|
610 | 963 | BUG_ON(table->count == 0); |
---|
611 | | - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); |
---|
612 | | - table->count--; |
---|
613 | | - if (ovs_identifier_is_ufid(&flow->id)) { |
---|
614 | | - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); |
---|
615 | | - table->ufid_count--; |
---|
616 | | - } |
---|
617 | | - |
---|
618 | | - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be |
---|
619 | | - * accessible as long as the RCU read lock is held. |
---|
620 | | - */ |
---|
621 | | - flow_mask_remove(table, flow->mask); |
---|
| 964 | + table_instance_flow_free(table, ti, ufid_ti, flow); |
---|
622 | 965 | } |
---|
623 | 966 | |
---|
624 | 967 | static struct sw_flow_mask *mask_alloc(void) |
---|
.. | .. |
---|
646 | 989 | static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, |
---|
647 | 990 | const struct sw_flow_mask *mask) |
---|
648 | 991 | { |
---|
649 | | - struct list_head *ml; |
---|
| 992 | + struct mask_array *ma; |
---|
| 993 | + int i; |
---|
650 | 994 | |
---|
651 | | - list_for_each(ml, &tbl->mask_list) { |
---|
652 | | - struct sw_flow_mask *m; |
---|
653 | | - m = container_of(ml, struct sw_flow_mask, list); |
---|
654 | | - if (mask_equal(mask, m)) |
---|
655 | | - return m; |
---|
| 995 | + ma = ovsl_dereference(tbl->mask_array); |
---|
| 996 | + for (i = 0; i < ma->max; i++) { |
---|
| 997 | + struct sw_flow_mask *t; |
---|
| 998 | + t = ovsl_dereference(ma->masks[i]); |
---|
| 999 | + |
---|
| 1000 | + if (t && mask_equal(mask, t)) |
---|
| 1001 | + return t; |
---|
656 | 1002 | } |
---|
657 | 1003 | |
---|
658 | 1004 | return NULL; |
---|
.. | .. |
---|
663 | 1009 | const struct sw_flow_mask *new) |
---|
664 | 1010 | { |
---|
665 | 1011 | struct sw_flow_mask *mask; |
---|
| 1012 | + |
---|
666 | 1013 | mask = flow_mask_find(tbl, new); |
---|
667 | 1014 | if (!mask) { |
---|
668 | 1015 | /* Allocate a new mask if none exsits. */ |
---|
.. | .. |
---|
671 | 1018 | return -ENOMEM; |
---|
672 | 1019 | mask->key = new->key; |
---|
673 | 1020 | mask->range = new->range; |
---|
674 | | - list_add_rcu(&mask->list, &tbl->mask_list); |
---|
| 1021 | + |
---|
| 1022 | + /* Add mask to mask-list. */ |
---|
| 1023 | + if (tbl_mask_array_add_mask(tbl, mask)) { |
---|
| 1024 | + kfree(mask); |
---|
| 1025 | + return -ENOMEM; |
---|
| 1026 | + } |
---|
675 | 1027 | } else { |
---|
676 | 1028 | BUG_ON(!mask->ref_count); |
---|
677 | 1029 | mask->ref_count++; |
---|
.. | .. |
---|
743 | 1095 | return 0; |
---|
744 | 1096 | } |
---|
745 | 1097 | |
---|
| 1098 | +static int compare_mask_and_count(const void *a, const void *b) |
---|
| 1099 | +{ |
---|
| 1100 | + const struct mask_count *mc_a = a; |
---|
| 1101 | + const struct mask_count *mc_b = b; |
---|
| 1102 | + |
---|
| 1103 | + return (s64)mc_b->counter - (s64)mc_a->counter; |
---|
| 1104 | +} |
---|
| 1105 | + |
---|
| 1106 | +/* Must be called with OVS mutex held. */ |
---|
| 1107 | +void ovs_flow_masks_rebalance(struct flow_table *table) |
---|
| 1108 | +{ |
---|
| 1109 | + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); |
---|
| 1110 | + struct mask_count *masks_and_count; |
---|
| 1111 | + struct mask_array *new; |
---|
| 1112 | + int masks_entries = 0; |
---|
| 1113 | + int i; |
---|
| 1114 | + |
---|
| 1115 | + /* Build array of all current entries with use counters. */ |
---|
| 1116 | + masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count), |
---|
| 1117 | + GFP_KERNEL); |
---|
| 1118 | + if (!masks_and_count) |
---|
| 1119 | + return; |
---|
| 1120 | + |
---|
| 1121 | + for (i = 0; i < ma->max; i++) { |
---|
| 1122 | + struct sw_flow_mask *mask; |
---|
| 1123 | + int cpu; |
---|
| 1124 | + |
---|
| 1125 | + mask = rcu_dereference_ovsl(ma->masks[i]); |
---|
| 1126 | + if (unlikely(!mask)) |
---|
| 1127 | + break; |
---|
| 1128 | + |
---|
| 1129 | + masks_and_count[i].index = i; |
---|
| 1130 | + masks_and_count[i].counter = 0; |
---|
| 1131 | + |
---|
| 1132 | + for_each_possible_cpu(cpu) { |
---|
| 1133 | + struct mask_array_stats *stats; |
---|
| 1134 | + unsigned int start; |
---|
| 1135 | + u64 counter; |
---|
| 1136 | + |
---|
| 1137 | + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); |
---|
| 1138 | + do { |
---|
| 1139 | + start = u64_stats_fetch_begin_irq(&stats->syncp); |
---|
| 1140 | + counter = stats->usage_cntrs[i]; |
---|
| 1141 | + } while (u64_stats_fetch_retry_irq(&stats->syncp, |
---|
| 1142 | + start)); |
---|
| 1143 | + |
---|
| 1144 | + masks_and_count[i].counter += counter; |
---|
| 1145 | + } |
---|
| 1146 | + |
---|
| 1147 | + /* Subtract the zero count value. */ |
---|
| 1148 | + masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i]; |
---|
| 1149 | + |
---|
| 1150 | + /* Rather than calling tbl_mask_array_reset_counters() |
---|
| 1151 | + * below when no change is needed, do it inline here. |
---|
| 1152 | + */ |
---|
| 1153 | + ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter; |
---|
| 1154 | + } |
---|
| 1155 | + |
---|
| 1156 | + if (i == 0) |
---|
| 1157 | + goto free_mask_entries; |
---|
| 1158 | + |
---|
| 1159 | + /* Sort the entries */ |
---|
| 1160 | + masks_entries = i; |
---|
| 1161 | + sort(masks_and_count, masks_entries, sizeof(*masks_and_count), |
---|
| 1162 | + compare_mask_and_count, NULL); |
---|
| 1163 | + |
---|
| 1164 | + /* If the order is the same, nothing to do... */ |
---|
| 1165 | + for (i = 0; i < masks_entries; i++) { |
---|
| 1166 | + if (i != masks_and_count[i].index) |
---|
| 1167 | + break; |
---|
| 1168 | + } |
---|
| 1169 | + if (i == masks_entries) |
---|
| 1170 | + goto free_mask_entries; |
---|
| 1171 | + |
---|
| 1172 | + /* Rebuilt the new list in order of usage. */ |
---|
| 1173 | + new = tbl_mask_array_alloc(ma->max); |
---|
| 1174 | + if (!new) |
---|
| 1175 | + goto free_mask_entries; |
---|
| 1176 | + |
---|
| 1177 | + for (i = 0; i < masks_entries; i++) { |
---|
| 1178 | + int index = masks_and_count[i].index; |
---|
| 1179 | + |
---|
| 1180 | + if (ovsl_dereference(ma->masks[index])) |
---|
| 1181 | + new->masks[new->count++] = ma->masks[index]; |
---|
| 1182 | + } |
---|
| 1183 | + |
---|
| 1184 | + rcu_assign_pointer(table->mask_array, new); |
---|
| 1185 | + call_rcu(&ma->rcu, mask_array_rcu_cb); |
---|
| 1186 | + |
---|
| 1187 | +free_mask_entries: |
---|
| 1188 | + kfree(masks_and_count); |
---|
| 1189 | +} |
---|
| 1190 | + |
---|
746 | 1191 | /* Initializes the flow module. |
---|
747 | 1192 | * Returns zero if successful or a negative error code. */ |
---|
748 | 1193 | int ovs_flow_init(void) |
---|
.. | .. |
---|
752 | 1197 | |
---|
753 | 1198 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) |
---|
754 | 1199 | + (nr_cpu_ids |
---|
755 | | - * sizeof(struct flow_stats *)), |
---|
| 1200 | + * sizeof(struct sw_flow_stats *)), |
---|
756 | 1201 | 0, 0, NULL); |
---|
757 | 1202 | if (flow_cache == NULL) |
---|
758 | 1203 | return -ENOMEM; |
---|
759 | 1204 | |
---|
760 | 1205 | flow_stats_cache |
---|
761 | | - = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), |
---|
| 1206 | + = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats), |
---|
762 | 1207 | 0, SLAB_HWCACHE_ALIGN, NULL); |
---|
763 | 1208 | if (flow_stats_cache == NULL) { |
---|
764 | 1209 | kmem_cache_destroy(flow_cache); |
---|