.. | .. |
---|
14 | 14 | #include <linux/netfilter.h> |
---|
15 | 15 | #include <linux/netfilter/nf_tables.h> |
---|
16 | 16 | #include <net/netfilter/nf_tables_core.h> |
---|
| 17 | +#include <net/netns/generic.h> |
---|
| 18 | + |
---|
| 19 | +extern unsigned int nf_tables_net_id; |
---|
17 | 20 | |
---|
18 | 21 | struct nft_rbtree { |
---|
19 | 22 | struct rb_root root; |
---|
.. | .. |
---|
38 | 41 | return !nft_rbtree_interval_end(rbe); |
---|
39 | 42 | } |
---|
40 | 43 | |
---|
41 | | -static bool nft_rbtree_equal(const struct nft_set *set, const void *this, |
---|
42 | | - const struct nft_rbtree_elem *interval) |
---|
| 44 | +static int nft_rbtree_cmp(const struct nft_set *set, |
---|
| 45 | + const struct nft_rbtree_elem *e1, |
---|
| 46 | + const struct nft_rbtree_elem *e2) |
---|
43 | 47 | { |
---|
44 | | - return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; |
---|
| 48 | + return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), |
---|
| 49 | + set->klen); |
---|
| 50 | +} |
---|
| 51 | + |
---|
| 52 | +static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) |
---|
| 53 | +{ |
---|
| 54 | + return nft_set_elem_expired(&rbe->ext) || |
---|
| 55 | + nft_set_elem_is_dead(&rbe->ext); |
---|
45 | 56 | } |
---|
46 | 57 | |
---|
47 | 58 | static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, |
---|
.. | .. |
---|
52 | 63 | const struct nft_rbtree_elem *rbe, *interval = NULL; |
---|
53 | 64 | u8 genmask = nft_genmask_cur(net); |
---|
54 | 65 | const struct rb_node *parent; |
---|
55 | | - const void *this; |
---|
56 | 66 | int d; |
---|
57 | 67 | |
---|
58 | 68 | parent = rcu_dereference_raw(priv->root.rb_node); |
---|
.. | .. |
---|
62 | 72 | |
---|
63 | 73 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); |
---|
64 | 74 | |
---|
65 | | - this = nft_set_ext_key(&rbe->ext); |
---|
66 | | - d = memcmp(this, key, set->klen); |
---|
| 75 | + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); |
---|
67 | 76 | if (d < 0) { |
---|
68 | 77 | parent = rcu_dereference_raw(parent->rb_left); |
---|
69 | 78 | if (interval && |
---|
70 | | - nft_rbtree_equal(set, this, interval) && |
---|
| 79 | + !nft_rbtree_cmp(set, rbe, interval) && |
---|
71 | 80 | nft_rbtree_interval_end(rbe) && |
---|
72 | 81 | nft_rbtree_interval_start(interval)) |
---|
73 | 82 | continue; |
---|
.. | .. |
---|
80 | 89 | continue; |
---|
81 | 90 | } |
---|
82 | 91 | |
---|
83 | | - if (nft_set_elem_expired(&rbe->ext)) |
---|
| 92 | + if (nft_rbtree_elem_expired(rbe)) |
---|
84 | 93 | return false; |
---|
85 | 94 | |
---|
86 | 95 | if (nft_rbtree_interval_end(rbe)) { |
---|
.. | .. |
---|
98 | 107 | |
---|
99 | 108 | if (set->flags & NFT_SET_INTERVAL && interval != NULL && |
---|
100 | 109 | nft_set_elem_active(&interval->ext, genmask) && |
---|
101 | | - !nft_set_elem_expired(&interval->ext) && |
---|
| 110 | + !nft_rbtree_elem_expired(interval) && |
---|
102 | 111 | nft_rbtree_interval_start(interval)) { |
---|
103 | 112 | *ext = &interval->ext; |
---|
104 | 113 | return true; |
---|
.. | .. |
---|
214 | 223 | return rbe; |
---|
215 | 224 | } |
---|
216 | 225 | |
---|
| 226 | +static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, |
---|
| 227 | + struct nft_rbtree *priv, |
---|
| 228 | + struct nft_rbtree_elem *rbe) |
---|
| 229 | +{ |
---|
| 230 | + struct nft_set_elem elem = { |
---|
| 231 | + .priv = rbe, |
---|
| 232 | + }; |
---|
| 233 | + |
---|
| 234 | + nft_setelem_data_deactivate(net, set, &elem); |
---|
| 235 | + rb_erase(&rbe->node, &priv->root); |
---|
| 236 | +} |
---|
| 237 | + |
---|
| 238 | +static const struct nft_rbtree_elem * |
---|
| 239 | +nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, |
---|
| 240 | + struct nft_rbtree_elem *rbe, u8 genmask) |
---|
| 241 | +{ |
---|
| 242 | + struct nft_set *set = (struct nft_set *)__set; |
---|
| 243 | + struct rb_node *prev = rb_prev(&rbe->node); |
---|
| 244 | + struct net *net = read_pnet(&set->net); |
---|
| 245 | + struct nft_rbtree_elem *rbe_prev; |
---|
| 246 | + struct nft_trans_gc *gc; |
---|
| 247 | + |
---|
| 248 | + gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); |
---|
| 249 | + if (!gc) |
---|
| 250 | + return ERR_PTR(-ENOMEM); |
---|
| 251 | + |
---|
| 252 | + /* search for end interval coming before this element. |
---|
| 253 | + * end intervals don't carry a timeout extension, they |
---|
| 254 | + * are coupled with the interval start element. |
---|
| 255 | + */ |
---|
| 256 | + while (prev) { |
---|
| 257 | + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); |
---|
| 258 | + if (nft_rbtree_interval_end(rbe_prev) && |
---|
| 259 | + nft_set_elem_active(&rbe_prev->ext, genmask)) |
---|
| 260 | + break; |
---|
| 261 | + |
---|
| 262 | + prev = rb_prev(prev); |
---|
| 263 | + } |
---|
| 264 | + |
---|
| 265 | + rbe_prev = NULL; |
---|
| 266 | + if (prev) { |
---|
| 267 | + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); |
---|
| 268 | + nft_rbtree_gc_remove(net, set, priv, rbe_prev); |
---|
| 269 | + |
---|
| 270 | + /* There is always room in this trans gc for this element, |
---|
| 271 | + * memory allocation never actually happens, hence, the warning |
---|
| 272 | + * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, |
---|
| 273 | + * this is synchronous gc which never fails. |
---|
| 274 | + */ |
---|
| 275 | + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); |
---|
| 276 | + if (WARN_ON_ONCE(!gc)) |
---|
| 277 | + return ERR_PTR(-ENOMEM); |
---|
| 278 | + |
---|
| 279 | + nft_trans_gc_elem_add(gc, rbe_prev); |
---|
| 280 | + } |
---|
| 281 | + |
---|
| 282 | + nft_rbtree_gc_remove(net, set, priv, rbe); |
---|
| 283 | + gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); |
---|
| 284 | + if (WARN_ON_ONCE(!gc)) |
---|
| 285 | + return ERR_PTR(-ENOMEM); |
---|
| 286 | + |
---|
| 287 | + nft_trans_gc_elem_add(gc, rbe); |
---|
| 288 | + |
---|
| 289 | + nft_trans_gc_queue_sync_done(gc); |
---|
| 290 | + |
---|
| 291 | + return rbe_prev; |
---|
| 292 | +} |
---|
| 293 | + |
---|
| 294 | +static bool nft_rbtree_update_first(const struct nft_set *set, |
---|
| 295 | + struct nft_rbtree_elem *rbe, |
---|
| 296 | + struct rb_node *first) |
---|
| 297 | +{ |
---|
| 298 | + struct nft_rbtree_elem *first_elem; |
---|
| 299 | + |
---|
| 300 | + first_elem = rb_entry(first, struct nft_rbtree_elem, node); |
---|
| 301 | + /* this element is closest to where the new element is to be inserted: |
---|
| 302 | + * update the first element for the node list path. |
---|
| 303 | + */ |
---|
| 304 | + if (nft_rbtree_cmp(set, rbe, first_elem) < 0) |
---|
| 305 | + return true; |
---|
| 306 | + |
---|
| 307 | + return false; |
---|
| 308 | +} |
---|
| 309 | + |
---|
217 | 310 | static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, |
---|
218 | 311 | struct nft_rbtree_elem *new, |
---|
219 | 312 | struct nft_set_ext **ext) |
---|
220 | 313 | { |
---|
221 | | - bool overlap = false, dup_end_left = false, dup_end_right = false; |
---|
| 314 | + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; |
---|
| 315 | + struct rb_node *node, *next, *parent, **p, *first = NULL; |
---|
222 | 316 | struct nft_rbtree *priv = nft_set_priv(set); |
---|
| 317 | + u8 cur_genmask = nft_genmask_cur(net); |
---|
223 | 318 | u8 genmask = nft_genmask_next(net); |
---|
224 | | - struct nft_rbtree_elem *rbe; |
---|
225 | | - struct rb_node *parent, **p; |
---|
226 | 319 | int d; |
---|
227 | 320 | |
---|
228 | | - /* Detect overlaps as we descend the tree. Set the flag in these cases: |
---|
229 | | - * |
---|
230 | | - * a1. _ _ __>| ?_ _ __| (insert end before existing end) |
---|
231 | | - * a2. _ _ ___| ?_ _ _>| (insert end after existing end) |
---|
232 | | - * a3. _ _ ___? >|_ _ __| (insert start before existing end) |
---|
233 | | - * |
---|
234 | | - * and clear it later on, as we eventually reach the points indicated by |
---|
235 | | - * '?' above, in the cases described below. We'll always meet these |
---|
236 | | - * later, locally, due to tree ordering, and overlaps for the intervals |
---|
237 | | - * that are the closest together are always evaluated last. |
---|
238 | | - * |
---|
239 | | - * b1. _ _ __>| !_ _ __| (insert end before existing start) |
---|
240 | | - * b2. _ _ ___| !_ _ _>| (insert end after existing start) |
---|
241 | | - * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) |
---|
242 | | - * '--' no nodes falling in this range |
---|
243 | | - * b4. >|_ _ ! (insert start before existing start) |
---|
244 | | - * |
---|
245 | | - * Case a3. resolves to b3.: |
---|
246 | | - * - if the inserted start element is the leftmost, because the '0' |
---|
247 | | - * element in the tree serves as end element |
---|
248 | | - * - otherwise, if an existing end is found immediately to the left. If |
---|
249 | | - * there are existing nodes in between, we need to further descend the |
---|
250 | | - * tree before we can conclude the new start isn't causing an overlap |
---|
251 | | - * |
---|
252 | | - * or to b4., which, preceded by a3., means we already traversed one or |
---|
253 | | - * more existing intervals entirely, from the right. |
---|
254 | | - * |
---|
255 | | - * For a new, rightmost pair of elements, we'll hit cases b3. and b2., |
---|
256 | | - * in that order. |
---|
257 | | - * |
---|
258 | | - * The flag is also cleared in two special cases: |
---|
259 | | - * |
---|
260 | | - * b5. |__ _ _!|<_ _ _ (insert start right before existing end) |
---|
261 | | - * b6. |__ _ >|!__ _ _ (insert end right after existing start) |
---|
262 | | - * |
---|
263 | | - * which always happen as last step and imply that no further |
---|
264 | | - * overlapping is possible. |
---|
265 | | - * |
---|
266 | | - * Another special case comes from the fact that start elements matching |
---|
267 | | - * an already existing start element are allowed: insertion is not |
---|
268 | | - * performed but we return -EEXIST in that case, and the error will be |
---|
269 | | - * cleared by the caller if NLM_F_EXCL is not present in the request. |
---|
270 | | - * This way, request for insertion of an exact overlap isn't reported as |
---|
271 | | - * error to userspace if not desired. |
---|
272 | | - * |
---|
273 | | - * However, if the existing start matches a pre-existing start, but the |
---|
274 | | - * end element doesn't match the corresponding pre-existing end element, |
---|
275 | | - * we need to report a partial overlap. This is a local condition that |
---|
276 | | - * can be noticed without need for a tracking flag, by checking for a |
---|
277 | | - * local duplicated end for a corresponding start, from left and right, |
---|
278 | | - * separately. |
---|
| 321 | + /* Descend the tree to search for an existing element greater than the |
---|
| 322 | + * key value to insert that is greater than the new element. This is the |
---|
| 323 | + * first element to walk the ordered elements to find possible overlap. |
---|
279 | 324 | */ |
---|
280 | | - |
---|
281 | 325 | parent = NULL; |
---|
282 | 326 | p = &priv->root.rb_node; |
---|
283 | 327 | while (*p != NULL) { |
---|
284 | 328 | parent = *p; |
---|
285 | 329 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); |
---|
286 | | - d = memcmp(nft_set_ext_key(&rbe->ext), |
---|
287 | | - nft_set_ext_key(&new->ext), |
---|
288 | | - set->klen); |
---|
| 330 | + d = nft_rbtree_cmp(set, rbe, new); |
---|
| 331 | + |
---|
289 | 332 | if (d < 0) { |
---|
290 | 333 | p = &parent->rb_left; |
---|
291 | | - |
---|
292 | | - if (nft_rbtree_interval_start(new)) { |
---|
293 | | - if (nft_rbtree_interval_end(rbe) && |
---|
294 | | - nft_set_elem_active(&rbe->ext, genmask) && |
---|
295 | | - !nft_set_elem_expired(&rbe->ext) && !*p) |
---|
296 | | - overlap = false; |
---|
297 | | - } else { |
---|
298 | | - if (dup_end_left && !*p) |
---|
299 | | - return -ENOTEMPTY; |
---|
300 | | - |
---|
301 | | - overlap = nft_rbtree_interval_end(rbe) && |
---|
302 | | - nft_set_elem_active(&rbe->ext, |
---|
303 | | - genmask) && |
---|
304 | | - !nft_set_elem_expired(&rbe->ext); |
---|
305 | | - |
---|
306 | | - if (overlap) { |
---|
307 | | - dup_end_right = true; |
---|
308 | | - continue; |
---|
309 | | - } |
---|
310 | | - } |
---|
311 | 334 | } else if (d > 0) { |
---|
| 335 | + if (!first || |
---|
| 336 | + nft_rbtree_update_first(set, rbe, first)) |
---|
| 337 | + first = &rbe->node; |
---|
| 338 | + |
---|
312 | 339 | p = &parent->rb_right; |
---|
313 | | - |
---|
314 | | - if (nft_rbtree_interval_end(new)) { |
---|
315 | | - if (dup_end_right && !*p) |
---|
316 | | - return -ENOTEMPTY; |
---|
317 | | - |
---|
318 | | - overlap = nft_rbtree_interval_end(rbe) && |
---|
319 | | - nft_set_elem_active(&rbe->ext, |
---|
320 | | - genmask) && |
---|
321 | | - !nft_set_elem_expired(&rbe->ext); |
---|
322 | | - |
---|
323 | | - if (overlap) { |
---|
324 | | - dup_end_left = true; |
---|
325 | | - continue; |
---|
326 | | - } |
---|
327 | | - } else if (nft_set_elem_active(&rbe->ext, genmask) && |
---|
328 | | - !nft_set_elem_expired(&rbe->ext)) { |
---|
329 | | - overlap = nft_rbtree_interval_end(rbe); |
---|
330 | | - } |
---|
331 | 340 | } else { |
---|
332 | | - if (nft_rbtree_interval_end(rbe) && |
---|
333 | | - nft_rbtree_interval_start(new)) { |
---|
| 341 | + if (nft_rbtree_interval_end(rbe)) |
---|
334 | 342 | p = &parent->rb_left; |
---|
335 | | - |
---|
336 | | - if (nft_set_elem_active(&rbe->ext, genmask) && |
---|
337 | | - !nft_set_elem_expired(&rbe->ext)) |
---|
338 | | - overlap = false; |
---|
339 | | - } else if (nft_rbtree_interval_start(rbe) && |
---|
340 | | - nft_rbtree_interval_end(new)) { |
---|
| 343 | + else |
---|
341 | 344 | p = &parent->rb_right; |
---|
342 | | - |
---|
343 | | - if (nft_set_elem_active(&rbe->ext, genmask) && |
---|
344 | | - !nft_set_elem_expired(&rbe->ext)) |
---|
345 | | - overlap = false; |
---|
346 | | - } else if (nft_set_elem_active(&rbe->ext, genmask) && |
---|
347 | | - !nft_set_elem_expired(&rbe->ext)) { |
---|
348 | | - *ext = &rbe->ext; |
---|
349 | | - return -EEXIST; |
---|
350 | | - } else { |
---|
351 | | - overlap = false; |
---|
352 | | - if (nft_rbtree_interval_end(rbe)) |
---|
353 | | - p = &parent->rb_left; |
---|
354 | | - else |
---|
355 | | - p = &parent->rb_right; |
---|
356 | | - } |
---|
357 | 345 | } |
---|
358 | | - |
---|
359 | | - dup_end_left = dup_end_right = false; |
---|
360 | 346 | } |
---|
361 | 347 | |
---|
362 | | - if (overlap) |
---|
| 348 | + if (!first) |
---|
| 349 | + first = rb_first(&priv->root); |
---|
| 350 | + |
---|
| 351 | + /* Detect overlap by going through the list of valid tree nodes. |
---|
| 352 | + * Values stored in the tree are in reversed order, starting from |
---|
| 353 | + * highest to lowest value. |
---|
| 354 | + */ |
---|
| 355 | + for (node = first; node != NULL; node = next) { |
---|
| 356 | + next = rb_next(node); |
---|
| 357 | + |
---|
| 358 | + rbe = rb_entry(node, struct nft_rbtree_elem, node); |
---|
| 359 | + |
---|
| 360 | + if (!nft_set_elem_active(&rbe->ext, genmask)) |
---|
| 361 | + continue; |
---|
| 362 | + |
---|
| 363 | + /* perform garbage collection to avoid bogus overlap reports |
---|
| 364 | + * but skip new elements in this transaction. |
---|
| 365 | + */ |
---|
| 366 | + if (nft_set_elem_expired(&rbe->ext) && |
---|
| 367 | + nft_set_elem_active(&rbe->ext, cur_genmask)) { |
---|
| 368 | + const struct nft_rbtree_elem *removed_end; |
---|
| 369 | + |
---|
| 370 | + removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); |
---|
| 371 | + if (IS_ERR(removed_end)) |
---|
| 372 | + return PTR_ERR(removed_end); |
---|
| 373 | + |
---|
| 374 | + if (removed_end == rbe_le || removed_end == rbe_ge) |
---|
| 375 | + return -EAGAIN; |
---|
| 376 | + |
---|
| 377 | + continue; |
---|
| 378 | + } |
---|
| 379 | + |
---|
| 380 | + d = nft_rbtree_cmp(set, rbe, new); |
---|
| 381 | + if (d == 0) { |
---|
| 382 | + /* Matching end element: no need to look for an |
---|
| 383 | + * overlapping greater or equal element. |
---|
| 384 | + */ |
---|
| 385 | + if (nft_rbtree_interval_end(rbe)) { |
---|
| 386 | + rbe_le = rbe; |
---|
| 387 | + break; |
---|
| 388 | + } |
---|
| 389 | + |
---|
| 390 | + /* first element that is greater or equal to key value. */ |
---|
| 391 | + if (!rbe_ge) { |
---|
| 392 | + rbe_ge = rbe; |
---|
| 393 | + continue; |
---|
| 394 | + } |
---|
| 395 | + |
---|
| 396 | + /* this is a closer more or equal element, update it. */ |
---|
| 397 | + if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { |
---|
| 398 | + rbe_ge = rbe; |
---|
| 399 | + continue; |
---|
| 400 | + } |
---|
| 401 | + |
---|
| 402 | + /* element is equal to key value, make sure flags are |
---|
| 403 | + * the same, an existing more or equal start element |
---|
| 404 | + * must not be replaced by more or equal end element. |
---|
| 405 | + */ |
---|
| 406 | + if ((nft_rbtree_interval_start(new) && |
---|
| 407 | + nft_rbtree_interval_start(rbe_ge)) || |
---|
| 408 | + (nft_rbtree_interval_end(new) && |
---|
| 409 | + nft_rbtree_interval_end(rbe_ge))) { |
---|
| 410 | + rbe_ge = rbe; |
---|
| 411 | + continue; |
---|
| 412 | + } |
---|
| 413 | + } else if (d > 0) { |
---|
| 414 | + /* annotate element greater than the new element. */ |
---|
| 415 | + rbe_ge = rbe; |
---|
| 416 | + continue; |
---|
| 417 | + } else if (d < 0) { |
---|
| 418 | + /* annotate element less than the new element. */ |
---|
| 419 | + rbe_le = rbe; |
---|
| 420 | + break; |
---|
| 421 | + } |
---|
| 422 | + } |
---|
| 423 | + |
---|
| 424 | + /* - new start element matching existing start element: full overlap |
---|
| 425 | + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. |
---|
| 426 | + */ |
---|
| 427 | + if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && |
---|
| 428 | + nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { |
---|
| 429 | + *ext = &rbe_ge->ext; |
---|
| 430 | + return -EEXIST; |
---|
| 431 | + } |
---|
| 432 | + |
---|
| 433 | + /* - new end element matching existing end element: full overlap |
---|
| 434 | + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. |
---|
| 435 | + */ |
---|
| 436 | + if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && |
---|
| 437 | + nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { |
---|
| 438 | + *ext = &rbe_le->ext; |
---|
| 439 | + return -EEXIST; |
---|
| 440 | + } |
---|
| 441 | + |
---|
| 442 | + /* - new start element with existing closest, less or equal key value |
---|
| 443 | + * being a start element: partial overlap, reported as -ENOTEMPTY. |
---|
| 444 | + * Anonymous sets allow for two consecutive start element since they |
---|
| 445 | + * are constant, skip them to avoid bogus overlap reports. |
---|
| 446 | + */ |
---|
| 447 | + if (!nft_set_is_anonymous(set) && rbe_le && |
---|
| 448 | + nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) |
---|
363 | 449 | return -ENOTEMPTY; |
---|
| 450 | + |
---|
| 451 | + /* - new end element with existing closest, less or equal key value |
---|
| 452 | + * being a end element: partial overlap, reported as -ENOTEMPTY. |
---|
| 453 | + */ |
---|
| 454 | + if (rbe_le && |
---|
| 455 | + nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) |
---|
| 456 | + return -ENOTEMPTY; |
---|
| 457 | + |
---|
| 458 | + /* - new end element with existing closest, greater or equal key value |
---|
| 459 | + * being an end element: partial overlap, reported as -ENOTEMPTY |
---|
| 460 | + */ |
---|
| 461 | + if (rbe_ge && |
---|
| 462 | + nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) |
---|
| 463 | + return -ENOTEMPTY; |
---|
| 464 | + |
---|
| 465 | + /* Accepted element: pick insertion point depending on key value */ |
---|
| 466 | + parent = NULL; |
---|
| 467 | + p = &priv->root.rb_node; |
---|
| 468 | + while (*p != NULL) { |
---|
| 469 | + parent = *p; |
---|
| 470 | + rbe = rb_entry(parent, struct nft_rbtree_elem, node); |
---|
| 471 | + d = nft_rbtree_cmp(set, rbe, new); |
---|
| 472 | + |
---|
| 473 | + if (d < 0) |
---|
| 474 | + p = &parent->rb_left; |
---|
| 475 | + else if (d > 0) |
---|
| 476 | + p = &parent->rb_right; |
---|
| 477 | + else if (nft_rbtree_interval_end(rbe)) |
---|
| 478 | + p = &parent->rb_left; |
---|
| 479 | + else |
---|
| 480 | + p = &parent->rb_right; |
---|
| 481 | + } |
---|
364 | 482 | |
---|
365 | 483 | rb_link_node_rcu(&new->node, parent, p); |
---|
366 | 484 | rb_insert_color(&new->node, &priv->root); |
---|
.. | .. |
---|
375 | 493 | struct nft_rbtree_elem *rbe = elem->priv; |
---|
376 | 494 | int err; |
---|
377 | 495 | |
---|
378 | | - write_lock_bh(&priv->lock); |
---|
379 | | - write_seqcount_begin(&priv->count); |
---|
380 | | - err = __nft_rbtree_insert(net, set, rbe, ext); |
---|
381 | | - write_seqcount_end(&priv->count); |
---|
382 | | - write_unlock_bh(&priv->lock); |
---|
| 496 | + do { |
---|
| 497 | + if (fatal_signal_pending(current)) |
---|
| 498 | + return -EINTR; |
---|
| 499 | + |
---|
| 500 | + cond_resched(); |
---|
| 501 | + |
---|
| 502 | + write_lock_bh(&priv->lock); |
---|
| 503 | + write_seqcount_begin(&priv->count); |
---|
| 504 | + err = __nft_rbtree_insert(net, set, rbe, ext); |
---|
| 505 | + write_seqcount_end(&priv->count); |
---|
| 506 | + write_unlock_bh(&priv->lock); |
---|
| 507 | + } while (err == -EAGAIN); |
---|
383 | 508 | |
---|
384 | 509 | return err; |
---|
385 | 510 | } |
---|
.. | .. |
---|
405 | 530 | struct nft_rbtree_elem *rbe = elem->priv; |
---|
406 | 531 | |
---|
407 | 532 | nft_set_elem_change_active(net, set, &rbe->ext); |
---|
408 | | - nft_set_elem_clear_busy(&rbe->ext); |
---|
409 | 533 | } |
---|
410 | 534 | |
---|
411 | 535 | static bool nft_rbtree_flush(const struct net *net, |
---|
.. | .. |
---|
413 | 537 | { |
---|
414 | 538 | struct nft_rbtree_elem *rbe = priv; |
---|
415 | 539 | |
---|
416 | | - if (!nft_set_elem_mark_busy(&rbe->ext) || |
---|
417 | | - !nft_is_active(net, &rbe->ext)) { |
---|
418 | | - nft_set_elem_change_active(net, set, &rbe->ext); |
---|
419 | | - return true; |
---|
420 | | - } |
---|
421 | | - return false; |
---|
| 540 | + nft_set_elem_change_active(net, set, &rbe->ext); |
---|
| 541 | + |
---|
| 542 | + return true; |
---|
422 | 543 | } |
---|
423 | 544 | |
---|
424 | 545 | static void *nft_rbtree_deactivate(const struct net *net, |
---|
.. | .. |
---|
475 | 596 | |
---|
476 | 597 | if (iter->count < iter->skip) |
---|
477 | 598 | goto cont; |
---|
478 | | - if (nft_set_elem_expired(&rbe->ext)) |
---|
479 | | - goto cont; |
---|
480 | 599 | if (!nft_set_elem_active(&rbe->ext, iter->genmask)) |
---|
481 | 600 | goto cont; |
---|
482 | 601 | |
---|
.. | .. |
---|
495 | 614 | |
---|
496 | 615 | static void nft_rbtree_gc(struct work_struct *work) |
---|
497 | 616 | { |
---|
498 | | - struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; |
---|
499 | | - struct nft_set_gc_batch *gcb = NULL; |
---|
| 617 | + struct nft_rbtree_elem *rbe, *rbe_end = NULL; |
---|
| 618 | + struct nftables_pernet *nft_net; |
---|
500 | 619 | struct nft_rbtree *priv; |
---|
| 620 | + struct nft_trans_gc *gc; |
---|
501 | 621 | struct rb_node *node; |
---|
502 | 622 | struct nft_set *set; |
---|
| 623 | + unsigned int gc_seq; |
---|
| 624 | + struct net *net; |
---|
503 | 625 | |
---|
504 | 626 | priv = container_of(work, struct nft_rbtree, gc_work.work); |
---|
505 | 627 | set = nft_set_container_of(priv); |
---|
| 628 | + net = read_pnet(&set->net); |
---|
| 629 | + nft_net = net_generic(net, nf_tables_net_id); |
---|
| 630 | + gc_seq = READ_ONCE(nft_net->gc_seq); |
---|
506 | 631 | |
---|
507 | | - write_lock_bh(&priv->lock); |
---|
508 | | - write_seqcount_begin(&priv->count); |
---|
| 632 | + if (nft_set_gc_is_pending(set)) |
---|
| 633 | + goto done; |
---|
| 634 | + |
---|
| 635 | + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); |
---|
| 636 | + if (!gc) |
---|
| 637 | + goto done; |
---|
| 638 | + |
---|
| 639 | + read_lock_bh(&priv->lock); |
---|
509 | 640 | for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { |
---|
| 641 | + |
---|
| 642 | + /* Ruleset has been updated, try later. */ |
---|
| 643 | + if (READ_ONCE(nft_net->gc_seq) != gc_seq) { |
---|
| 644 | + nft_trans_gc_destroy(gc); |
---|
| 645 | + gc = NULL; |
---|
| 646 | + goto try_later; |
---|
| 647 | + } |
---|
| 648 | + |
---|
510 | 649 | rbe = rb_entry(node, struct nft_rbtree_elem, node); |
---|
511 | 650 | |
---|
| 651 | + if (nft_set_elem_is_dead(&rbe->ext)) |
---|
| 652 | + goto dead_elem; |
---|
| 653 | + |
---|
| 654 | + /* elements are reversed in the rbtree for historical reasons, |
---|
| 655 | + * from highest to lowest value, that is why end element is |
---|
| 656 | + * always visited before the start element. |
---|
| 657 | + */ |
---|
512 | 658 | if (nft_rbtree_interval_end(rbe)) { |
---|
513 | 659 | rbe_end = rbe; |
---|
514 | 660 | continue; |
---|
515 | 661 | } |
---|
516 | 662 | if (!nft_set_elem_expired(&rbe->ext)) |
---|
517 | 663 | continue; |
---|
518 | | - if (nft_set_elem_mark_busy(&rbe->ext)) |
---|
| 664 | + |
---|
| 665 | + nft_set_elem_dead(&rbe->ext); |
---|
| 666 | + |
---|
| 667 | + if (!rbe_end) |
---|
519 | 668 | continue; |
---|
520 | 669 | |
---|
521 | | - if (rbe_prev) { |
---|
522 | | - rb_erase(&rbe_prev->node, &priv->root); |
---|
523 | | - rbe_prev = NULL; |
---|
524 | | - } |
---|
525 | | - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); |
---|
526 | | - if (!gcb) |
---|
527 | | - break; |
---|
| 670 | + nft_set_elem_dead(&rbe_end->ext); |
---|
528 | 671 | |
---|
529 | | - atomic_dec(&set->nelems); |
---|
530 | | - nft_set_gc_batch_add(gcb, rbe); |
---|
531 | | - rbe_prev = rbe; |
---|
| 672 | + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); |
---|
| 673 | + if (!gc) |
---|
| 674 | + goto try_later; |
---|
532 | 675 | |
---|
533 | | - if (rbe_end) { |
---|
534 | | - atomic_dec(&set->nelems); |
---|
535 | | - nft_set_gc_batch_add(gcb, rbe_end); |
---|
536 | | - rb_erase(&rbe_end->node, &priv->root); |
---|
537 | | - rbe_end = NULL; |
---|
538 | | - } |
---|
539 | | - node = rb_next(node); |
---|
540 | | - if (!node) |
---|
541 | | - break; |
---|
| 676 | + nft_trans_gc_elem_add(gc, rbe_end); |
---|
| 677 | + rbe_end = NULL; |
---|
| 678 | +dead_elem: |
---|
| 679 | + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); |
---|
| 680 | + if (!gc) |
---|
| 681 | + goto try_later; |
---|
| 682 | + |
---|
| 683 | + nft_trans_gc_elem_add(gc, rbe); |
---|
542 | 684 | } |
---|
543 | | - if (rbe_prev) |
---|
544 | | - rb_erase(&rbe_prev->node, &priv->root); |
---|
545 | | - write_seqcount_end(&priv->count); |
---|
546 | | - write_unlock_bh(&priv->lock); |
---|
| 685 | +try_later: |
---|
| 686 | + read_unlock_bh(&priv->lock); |
---|
547 | 687 | |
---|
548 | | - nft_set_gc_batch_complete(gcb); |
---|
549 | | - |
---|
| 688 | + if (gc) |
---|
| 689 | + nft_trans_gc_queue_async_done(gc); |
---|
| 690 | +done: |
---|
550 | 691 | queue_delayed_work(system_power_efficient_wq, &priv->gc_work, |
---|
551 | 692 | nft_set_gc_interval(set)); |
---|
552 | 693 | } |
---|
.. | .. |
---|
575 | 716 | return 0; |
---|
576 | 717 | } |
---|
577 | 718 | |
---|
578 | | -static void nft_rbtree_destroy(const struct nft_set *set) |
---|
| 719 | +static void nft_rbtree_destroy(const struct nft_ctx *ctx, |
---|
| 720 | + const struct nft_set *set) |
---|
579 | 721 | { |
---|
580 | 722 | struct nft_rbtree *priv = nft_set_priv(set); |
---|
581 | 723 | struct nft_rbtree_elem *rbe; |
---|
.. | .. |
---|
586 | 728 | while ((node = priv->root.rb_node) != NULL) { |
---|
587 | 729 | rb_erase(node, &priv->root); |
---|
588 | 730 | rbe = rb_entry(node, struct nft_rbtree_elem, node); |
---|
589 | | - nft_set_elem_destroy(set, rbe, true); |
---|
| 731 | + nf_tables_set_elem_destroy(ctx, set, rbe); |
---|
590 | 732 | } |
---|
591 | 733 | } |
---|
592 | 734 | |
---|