hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/net/sched/cls_api.c
....@@ -1,17 +1,12 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * net/sched/cls_api.c Packet classifier API.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License, or (at your option) any later version.
84 *
95 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
106 *
117 * Changes:
128 *
139 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14
- *
1510 */
1611
1712 #include <linux/module.h>
....@@ -25,19 +20,88 @@
2520 #include <linux/kmod.h>
2621 #include <linux/slab.h>
2722 #include <linux/idr.h>
23
+#include <linux/jhash.h>
24
+#include <linux/rculist.h>
2825 #include <net/net_namespace.h>
2926 #include <net/sock.h>
3027 #include <net/netlink.h>
3128 #include <net/pkt_sched.h>
3229 #include <net/pkt_cls.h>
33
-
34
-extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
30
+#include <net/tc_act/tc_pedit.h>
31
+#include <net/tc_act/tc_mirred.h>
32
+#include <net/tc_act/tc_vlan.h>
33
+#include <net/tc_act/tc_tunnel_key.h>
34
+#include <net/tc_act/tc_csum.h>
35
+#include <net/tc_act/tc_gact.h>
36
+#include <net/tc_act/tc_police.h>
37
+#include <net/tc_act/tc_sample.h>
38
+#include <net/tc_act/tc_skbedit.h>
39
+#include <net/tc_act/tc_ct.h>
40
+#include <net/tc_act/tc_mpls.h>
41
+#include <net/tc_act/tc_gate.h>
42
+#include <net/flow_offload.h>
3543
3644 /* The list of all installed classifier types */
3745 static LIST_HEAD(tcf_proto_base);
3846
3947 /* Protects list of registered TC modules. It is pure SMP lock. */
4048 static DEFINE_RWLOCK(cls_mod_lock);
49
+
50
+static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
51
+{
52
+ return jhash_3words(tp->chain->index, tp->prio,
53
+ (__force __u32)tp->protocol, 0);
54
+}
55
+
56
+static void tcf_proto_signal_destroying(struct tcf_chain *chain,
57
+ struct tcf_proto *tp)
58
+{
59
+ struct tcf_block *block = chain->block;
60
+
61
+ mutex_lock(&block->proto_destroy_lock);
62
+ hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
63
+ destroy_obj_hashfn(tp));
64
+ mutex_unlock(&block->proto_destroy_lock);
65
+}
66
+
67
+static bool tcf_proto_cmp(const struct tcf_proto *tp1,
68
+ const struct tcf_proto *tp2)
69
+{
70
+ return tp1->chain->index == tp2->chain->index &&
71
+ tp1->prio == tp2->prio &&
72
+ tp1->protocol == tp2->protocol;
73
+}
74
+
75
+static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
76
+ struct tcf_proto *tp)
77
+{
78
+ u32 hash = destroy_obj_hashfn(tp);
79
+ struct tcf_proto *iter;
80
+ bool found = false;
81
+
82
+ rcu_read_lock();
83
+ hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
84
+ destroy_ht_node, hash) {
85
+ if (tcf_proto_cmp(tp, iter)) {
86
+ found = true;
87
+ break;
88
+ }
89
+ }
90
+ rcu_read_unlock();
91
+
92
+ return found;
93
+}
94
+
95
+static void
96
+tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
97
+{
98
+ struct tcf_block *block = chain->block;
99
+
100
+ mutex_lock(&block->proto_destroy_lock);
101
+ if (hash_hashed(&tp->destroy_ht_node))
102
+ hash_del_rcu(&tp->destroy_ht_node);
103
+ mutex_unlock(&block->proto_destroy_lock);
104
+}
41105
42106 /* Find classifier type by string name */
43107
....@@ -60,7 +124,8 @@
60124 }
61125
62126 static const struct tcf_proto_ops *
63
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
127
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
128
+ struct netlink_ext_ack *extack)
64129 {
65130 const struct tcf_proto_ops *ops;
66131
....@@ -68,9 +133,11 @@
68133 if (ops)
69134 return ops;
70135 #ifdef CONFIG_MODULES
71
- rtnl_unlock();
136
+ if (rtnl_held)
137
+ rtnl_unlock();
72138 request_module("cls_%s", kind);
73
- rtnl_lock();
139
+ if (rtnl_held)
140
+ rtnl_lock();
74141 ops = __tcf_proto_lookup_ops(kind);
75142 /* We dropped the RTNL semaphore in order to perform
76143 * the module load. So, even if we succeeded in loading
....@@ -151,8 +218,37 @@
151218 return TC_H_MAJ(first);
152219 }
153220
221
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
222
+{
223
+ if (kind)
224
+ return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
225
+ memset(name, 0, IFNAMSIZ);
226
+ return false;
227
+}
228
+
229
+static bool tcf_proto_is_unlocked(const char *kind)
230
+{
231
+ const struct tcf_proto_ops *ops;
232
+ bool ret;
233
+
234
+ if (strlen(kind) == 0)
235
+ return false;
236
+
237
+ ops = tcf_proto_lookup_ops(kind, false, NULL);
238
+ /* On error return false to take rtnl lock. Proto lookup/create
239
+ * functions will perform lookup again and properly handle errors.
240
+ */
241
+ if (IS_ERR(ops))
242
+ return false;
243
+
244
+ ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
245
+ module_put(ops->owner);
246
+ return ret;
247
+}
248
+
154249 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
155250 u32 prio, struct tcf_chain *chain,
251
+ bool rtnl_held,
156252 struct netlink_ext_ack *extack)
157253 {
158254 struct tcf_proto *tp;
....@@ -162,7 +258,7 @@
162258 if (!tp)
163259 return ERR_PTR(-ENOBUFS);
164260
165
- tp->ops = tcf_proto_lookup_ops(kind, extack);
261
+ tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
166262 if (IS_ERR(tp->ops)) {
167263 err = PTR_ERR(tp->ops);
168264 goto errout;
....@@ -171,6 +267,8 @@
171267 tp->protocol = protocol;
172268 tp->prio = prio;
173269 tp->chain = chain;
270
+ spin_lock_init(&tp->lock);
271
+ refcount_set(&tp->refcnt, 1);
174272
175273 err = tp->ops->init(tp);
176274 if (err) {
....@@ -184,13 +282,60 @@
184282 return ERR_PTR(err);
185283 }
186284
187
-static void tcf_proto_destroy(struct tcf_proto *tp,
188
- struct netlink_ext_ack *extack)
285
+static void tcf_proto_get(struct tcf_proto *tp)
189286 {
190
- tp->ops->destroy(tp, extack);
287
+ refcount_inc(&tp->refcnt);
288
+}
289
+
290
+static void tcf_chain_put(struct tcf_chain *chain);
291
+
292
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
293
+ bool sig_destroy, struct netlink_ext_ack *extack)
294
+{
295
+ tp->ops->destroy(tp, rtnl_held, extack);
296
+ if (sig_destroy)
297
+ tcf_proto_signal_destroyed(tp->chain, tp);
298
+ tcf_chain_put(tp->chain);
191299 module_put(tp->ops->owner);
192300 kfree_rcu(tp, rcu);
193301 }
302
+
303
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
304
+ struct netlink_ext_ack *extack)
305
+{
306
+ if (refcount_dec_and_test(&tp->refcnt))
307
+ tcf_proto_destroy(tp, rtnl_held, true, extack);
308
+}
309
+
310
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
311
+{
312
+ if (tp->ops->delete_empty)
313
+ return tp->ops->delete_empty(tp);
314
+
315
+ tp->deleting = true;
316
+ return tp->deleting;
317
+}
318
+
319
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
320
+{
321
+ spin_lock(&tp->lock);
322
+ tp->deleting = true;
323
+ spin_unlock(&tp->lock);
324
+}
325
+
326
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
327
+{
328
+ bool deleting;
329
+
330
+ spin_lock(&tp->lock);
331
+ deleting = tp->deleting;
332
+ spin_unlock(&tp->lock);
333
+
334
+ return deleting;
335
+}
336
+
337
+#define ASSERT_BLOCK_LOCKED(block) \
338
+ lockdep_assert_held(&(block)->lock)
194339
195340 struct tcf_filter_chain_list_item {
196341 struct list_head list;
....@@ -203,10 +348,13 @@
203348 {
204349 struct tcf_chain *chain;
205350
351
+ ASSERT_BLOCK_LOCKED(block);
352
+
206353 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
207354 if (!chain)
208355 return NULL;
209
- list_add_tail(&chain->list, &block->chain_list);
356
+ list_add_tail_rcu(&chain->list, &block->chain_list);
357
+ mutex_init(&chain->filter_chain_lock);
210358 chain->block = block;
211359 chain->index = chain_index;
212360 chain->refcnt = 1;
....@@ -230,29 +378,60 @@
230378
231379 if (chain->index)
232380 return;
381
+
382
+ mutex_lock(&block->lock);
233383 list_for_each_entry(item, &block->chain0.filter_chain_list, list)
234384 tcf_chain_head_change_item(item, tp_head);
385
+ mutex_unlock(&block->lock);
235386 }
236387
237
-static void tcf_chain_destroy(struct tcf_chain *chain)
388
+/* Returns true if block can be safely freed. */
389
+
390
+static bool tcf_chain_detach(struct tcf_chain *chain)
238391 {
239392 struct tcf_block *block = chain->block;
240393
241
- list_del(&chain->list);
394
+ ASSERT_BLOCK_LOCKED(block);
395
+
396
+ list_del_rcu(&chain->list);
242397 if (!chain->index)
243398 block->chain0.chain = NULL;
244
- kfree(chain);
245
- if (list_empty(&block->chain_list) && block->refcnt == 0)
246
- kfree(block);
399
+
400
+ if (list_empty(&block->chain_list) &&
401
+ refcount_read(&block->refcnt) == 0)
402
+ return true;
403
+
404
+ return false;
405
+}
406
+
407
+static void tcf_block_destroy(struct tcf_block *block)
408
+{
409
+ mutex_destroy(&block->lock);
410
+ mutex_destroy(&block->proto_destroy_lock);
411
+ kfree_rcu(block, rcu);
412
+}
413
+
414
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
415
+{
416
+ struct tcf_block *block = chain->block;
417
+
418
+ mutex_destroy(&chain->filter_chain_lock);
419
+ kfree_rcu(chain, rcu);
420
+ if (free_block)
421
+ tcf_block_destroy(block);
247422 }
248423
249424 static void tcf_chain_hold(struct tcf_chain *chain)
250425 {
426
+ ASSERT_BLOCK_LOCKED(chain->block);
427
+
251428 ++chain->refcnt;
252429 }
253430
254431 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
255432 {
433
+ ASSERT_BLOCK_LOCKED(chain->block);
434
+
256435 /* In case all the references are action references, this
257436 * chain should not be shown to the user.
258437 */
....@@ -264,12 +443,28 @@
264443 {
265444 struct tcf_chain *chain;
266445
446
+ ASSERT_BLOCK_LOCKED(block);
447
+
267448 list_for_each_entry(chain, &block->chain_list, list) {
268449 if (chain->index == chain_index)
269450 return chain;
270451 }
271452 return NULL;
272453 }
454
+
455
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
456
+static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
457
+ u32 chain_index)
458
+{
459
+ struct tcf_chain *chain;
460
+
461
+ list_for_each_entry_rcu(chain, &block->chain_list, list) {
462
+ if (chain->index == chain_index)
463
+ return chain;
464
+ }
465
+ return NULL;
466
+}
467
+#endif
273468
274469 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
275470 u32 seq, u16 flags, int event, bool unicast);
....@@ -278,30 +473,39 @@
278473 u32 chain_index, bool create,
279474 bool by_act)
280475 {
281
- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
476
+ struct tcf_chain *chain = NULL;
477
+ bool is_first_reference;
282478
479
+ mutex_lock(&block->lock);
480
+ chain = tcf_chain_lookup(block, chain_index);
283481 if (chain) {
284482 tcf_chain_hold(chain);
285483 } else {
286484 if (!create)
287
- return NULL;
485
+ goto errout;
288486 chain = tcf_chain_create(block, chain_index);
289487 if (!chain)
290
- return NULL;
488
+ goto errout;
291489 }
292490
293491 if (by_act)
294492 ++chain->action_refcnt;
493
+ is_first_reference = chain->refcnt - chain->action_refcnt == 1;
494
+ mutex_unlock(&block->lock);
295495
296496 /* Send notification only in case we got the first
297497 * non-action reference. Until then, the chain acts only as
298498 * a placeholder for actions pointing to it and user ought
299499 * not know about them.
300500 */
301
- if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
501
+ if (is_first_reference && !by_act)
302502 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
303503 RTM_NEWCHAIN, false);
304504
505
+ return chain;
506
+
507
+errout:
508
+ mutex_unlock(&block->lock);
305509 return chain;
306510 }
307511
....@@ -317,72 +521,182 @@
317521 }
318522 EXPORT_SYMBOL(tcf_chain_get_by_act);
319523
320
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
524
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
525
+ void *tmplt_priv);
526
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
527
+ void *tmplt_priv, u32 chain_index,
528
+ struct tcf_block *block, struct sk_buff *oskb,
529
+ u32 seq, u16 flags, bool unicast);
321530
322
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
531
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
532
+ bool explicitly_created)
323533 {
534
+ struct tcf_block *block = chain->block;
535
+ const struct tcf_proto_ops *tmplt_ops;
536
+ unsigned int refcnt, non_act_refcnt;
537
+ bool free_block = false;
538
+ void *tmplt_priv;
539
+
540
+ mutex_lock(&block->lock);
541
+ if (explicitly_created) {
542
+ if (!chain->explicitly_created) {
543
+ mutex_unlock(&block->lock);
544
+ return;
545
+ }
546
+ chain->explicitly_created = false;
547
+ }
548
+
324549 if (by_act)
325550 chain->action_refcnt--;
326
- chain->refcnt--;
327551
328
- /* The last dropped non-action reference will trigger notification. */
329
- if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
330
- tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
552
+ /* tc_chain_notify_delete can't be called while holding block lock.
553
+ * However, when block is unlocked chain can be changed concurrently, so
554
+ * save these to temporary variables.
555
+ */
556
+ refcnt = --chain->refcnt;
557
+ non_act_refcnt = refcnt - chain->action_refcnt;
558
+ tmplt_ops = chain->tmplt_ops;
559
+ tmplt_priv = chain->tmplt_priv;
331560
332
- if (chain->refcnt == 0) {
333
- tc_chain_tmplt_del(chain);
334
- tcf_chain_destroy(chain);
561
+ if (non_act_refcnt == chain->explicitly_created && !by_act) {
562
+ if (non_act_refcnt == 0)
563
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv,
564
+ chain->index, block, NULL, 0, 0,
565
+ false);
566
+ /* Last reference to chain, no need to lock. */
567
+ chain->flushing = false;
568
+ }
569
+
570
+ if (refcnt == 0)
571
+ free_block = tcf_chain_detach(chain);
572
+ mutex_unlock(&block->lock);
573
+
574
+ if (refcnt == 0) {
575
+ tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
576
+ tcf_chain_destroy(chain, free_block);
335577 }
336578 }
337579
338580 static void tcf_chain_put(struct tcf_chain *chain)
339581 {
340
- __tcf_chain_put(chain, false);
582
+ __tcf_chain_put(chain, false, false);
341583 }
342584
343585 void tcf_chain_put_by_act(struct tcf_chain *chain)
344586 {
345
- __tcf_chain_put(chain, true);
587
+ __tcf_chain_put(chain, true, false);
346588 }
347589 EXPORT_SYMBOL(tcf_chain_put_by_act);
348590
349591 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
350592 {
351
- if (chain->explicitly_created)
352
- tcf_chain_put(chain);
593
+ __tcf_chain_put(chain, false, true);
353594 }
354595
355
-static void tcf_chain_flush(struct tcf_chain *chain)
596
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
356597 {
357
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
598
+ struct tcf_proto *tp, *tp_next;
358599
359
- tcf_chain0_head_change(chain, NULL);
600
+ mutex_lock(&chain->filter_chain_lock);
601
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
360602 while (tp) {
361
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
362
- tcf_proto_destroy(tp, NULL);
363
- tp = rtnl_dereference(chain->filter_chain);
364
- tcf_chain_put(chain);
603
+ tp_next = rcu_dereference_protected(tp->next, 1);
604
+ tcf_proto_signal_destroying(chain, tp);
605
+ tp = tp_next;
365606 }
607
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
608
+ RCU_INIT_POINTER(chain->filter_chain, NULL);
609
+ tcf_chain0_head_change(chain, NULL);
610
+ chain->flushing = true;
611
+ mutex_unlock(&chain->filter_chain_lock);
612
+
613
+ while (tp) {
614
+ tp_next = rcu_dereference_protected(tp->next, 1);
615
+ tcf_proto_put(tp, rtnl_held, NULL);
616
+ tp = tp_next;
617
+ }
618
+}
619
+
620
+static int tcf_block_setup(struct tcf_block *block,
621
+ struct flow_block_offload *bo);
622
+
623
+static void tcf_block_offload_init(struct flow_block_offload *bo,
624
+ struct net_device *dev, struct Qdisc *sch,
625
+ enum flow_block_command command,
626
+ enum flow_block_binder_type binder_type,
627
+ struct flow_block *flow_block,
628
+ bool shared, struct netlink_ext_ack *extack)
629
+{
630
+ bo->net = dev_net(dev);
631
+ bo->command = command;
632
+ bo->binder_type = binder_type;
633
+ bo->block = flow_block;
634
+ bo->block_shared = shared;
635
+ bo->extack = extack;
636
+ bo->sch = sch;
637
+ bo->cb_list_head = &flow_block->cb_list;
638
+ INIT_LIST_HEAD(&bo->cb_list);
639
+}
640
+
641
+static void tcf_block_unbind(struct tcf_block *block,
642
+ struct flow_block_offload *bo);
643
+
644
+static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
645
+{
646
+ struct tcf_block *block = block_cb->indr.data;
647
+ struct net_device *dev = block_cb->indr.dev;
648
+ struct Qdisc *sch = block_cb->indr.sch;
649
+ struct netlink_ext_ack extack = {};
650
+ struct flow_block_offload bo = {};
651
+
652
+ tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
653
+ block_cb->indr.binder_type,
654
+ &block->flow_block, tcf_block_shared(block),
655
+ &extack);
656
+ rtnl_lock();
657
+ down_write(&block->cb_lock);
658
+ list_del(&block_cb->driver_list);
659
+ list_move(&block_cb->list, &bo.cb_list);
660
+ tcf_block_unbind(block, &bo);
661
+ up_write(&block->cb_lock);
662
+ rtnl_unlock();
366663 }
367664
368665 static bool tcf_block_offload_in_use(struct tcf_block *block)
369666 {
370
- return block->offloadcnt;
667
+ return atomic_read(&block->offloadcnt);
371668 }
372669
373670 static int tcf_block_offload_cmd(struct tcf_block *block,
374
- struct net_device *dev,
671
+ struct net_device *dev, struct Qdisc *sch,
375672 struct tcf_block_ext_info *ei,
376
- enum tc_block_command command,
673
+ enum flow_block_command command,
377674 struct netlink_ext_ack *extack)
378675 {
379
- struct tc_block_offload bo = {};
676
+ struct flow_block_offload bo = {};
380677
381
- bo.command = command;
382
- bo.binder_type = ei->binder_type;
383
- bo.block = block;
384
- bo.extack = extack;
385
- return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
678
+ tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
679
+ &block->flow_block, tcf_block_shared(block),
680
+ extack);
681
+
682
+ if (dev->netdev_ops->ndo_setup_tc) {
683
+ int err;
684
+
685
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
686
+ if (err < 0) {
687
+ if (err != -EOPNOTSUPP)
688
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
689
+ return err;
690
+ }
691
+
692
+ return tcf_block_setup(block, &bo);
693
+ }
694
+
695
+ flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
696
+ tc_block_indr_cleanup);
697
+ tcf_block_setup(block, &bo);
698
+
699
+ return -EOPNOTSUPP;
386700 }
387701
388702 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
....@@ -392,27 +706,37 @@
392706 struct net_device *dev = q->dev_queue->dev;
393707 int err;
394708
395
- if (!dev->netdev_ops->ndo_setup_tc)
396
- goto no_offload_dev_inc;
709
+ down_write(&block->cb_lock);
397710
398711 /* If tc offload feature is disabled and the block we try to bind
399712 * to already has some offloaded filters, forbid to bind.
400713 */
401
- if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
714
+ if (dev->netdev_ops->ndo_setup_tc &&
715
+ !tc_can_offload(dev) &&
716
+ tcf_block_offload_in_use(block)) {
402717 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
403
- return -EOPNOTSUPP;
718
+ err = -EOPNOTSUPP;
719
+ goto err_unlock;
404720 }
405721
406
- err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
722
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
407723 if (err == -EOPNOTSUPP)
408724 goto no_offload_dev_inc;
409
- return err;
725
+ if (err)
726
+ goto err_unlock;
727
+
728
+ up_write(&block->cb_lock);
729
+ return 0;
410730
411731 no_offload_dev_inc:
412732 if (tcf_block_offload_in_use(block))
413
- return -EOPNOTSUPP;
733
+ goto err_unlock;
734
+
735
+ err = 0;
414736 block->nooffloaddevcnt++;
415
- return 0;
737
+err_unlock:
738
+ up_write(&block->cb_lock);
739
+ return err;
416740 }
417741
418742 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
....@@ -421,15 +745,16 @@
421745 struct net_device *dev = q->dev_queue->dev;
422746 int err;
423747
424
- if (!dev->netdev_ops->ndo_setup_tc)
425
- goto no_offload_dev_dec;
426
- err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
748
+ down_write(&block->cb_lock);
749
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
427750 if (err == -EOPNOTSUPP)
428751 goto no_offload_dev_dec;
752
+ up_write(&block->cb_lock);
429753 return;
430754
431755 no_offload_dev_dec:
432756 WARN_ON(block->nooffloaddevcnt-- == 0);
757
+ up_write(&block->cb_lock);
433758 }
434759
435760 static int
....@@ -437,8 +762,8 @@
437762 struct tcf_block_ext_info *ei,
438763 struct netlink_ext_ack *extack)
439764 {
440
- struct tcf_chain *chain0 = block->chain0.chain;
441765 struct tcf_filter_chain_list_item *item;
766
+ struct tcf_chain *chain0;
442767
443768 item = kmalloc(sizeof(*item), GFP_KERNEL);
444769 if (!item) {
....@@ -447,9 +772,32 @@
447772 }
448773 item->chain_head_change = ei->chain_head_change;
449774 item->chain_head_change_priv = ei->chain_head_change_priv;
450
- if (chain0 && chain0->filter_chain)
451
- tcf_chain_head_change_item(item, chain0->filter_chain);
452
- list_add(&item->list, &block->chain0.filter_chain_list);
775
+
776
+ mutex_lock(&block->lock);
777
+ chain0 = block->chain0.chain;
778
+ if (chain0)
779
+ tcf_chain_hold(chain0);
780
+ else
781
+ list_add(&item->list, &block->chain0.filter_chain_list);
782
+ mutex_unlock(&block->lock);
783
+
784
+ if (chain0) {
785
+ struct tcf_proto *tp_head;
786
+
787
+ mutex_lock(&chain0->filter_chain_lock);
788
+
789
+ tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
790
+ if (tp_head)
791
+ tcf_chain_head_change_item(item, tp_head);
792
+
793
+ mutex_lock(&block->lock);
794
+ list_add(&item->list, &block->chain0.filter_chain_list);
795
+ mutex_unlock(&block->lock);
796
+
797
+ mutex_unlock(&chain0->filter_chain_lock);
798
+ tcf_chain_put(chain0);
799
+ }
800
+
453801 return 0;
454802 }
455803
....@@ -457,24 +805,28 @@
457805 tcf_chain0_head_change_cb_del(struct tcf_block *block,
458806 struct tcf_block_ext_info *ei)
459807 {
460
- struct tcf_chain *chain0 = block->chain0.chain;
461808 struct tcf_filter_chain_list_item *item;
462809
810
+ mutex_lock(&block->lock);
463811 list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
464812 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
465813 (item->chain_head_change == ei->chain_head_change &&
466814 item->chain_head_change_priv == ei->chain_head_change_priv)) {
467
- if (chain0)
815
+ if (block->chain0.chain)
468816 tcf_chain_head_change_item(item, NULL);
469817 list_del(&item->list);
818
+ mutex_unlock(&block->lock);
819
+
470820 kfree(item);
471821 return;
472822 }
473823 }
824
+ mutex_unlock(&block->lock);
474825 WARN_ON(1);
475826 }
476827
477828 struct tcf_net {
829
+ spinlock_t idr_lock; /* Protects idr */
478830 struct idr idr;
479831 };
480832
....@@ -484,16 +836,25 @@
484836 struct netlink_ext_ack *extack)
485837 {
486838 struct tcf_net *tn = net_generic(net, tcf_net_id);
839
+ int err;
487840
488
- return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
489
- GFP_KERNEL);
841
+ idr_preload(GFP_KERNEL);
842
+ spin_lock(&tn->idr_lock);
843
+ err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
844
+ GFP_NOWAIT);
845
+ spin_unlock(&tn->idr_lock);
846
+ idr_preload_end();
847
+
848
+ return err;
490849 }
491850
492851 static void tcf_block_remove(struct tcf_block *block, struct net *net)
493852 {
494853 struct tcf_net *tn = net_generic(net, tcf_net_id);
495854
855
+ spin_lock(&tn->idr_lock);
496856 idr_remove(&tn->idr, block->index);
857
+ spin_unlock(&tn->idr_lock);
497858 }
498859
499860 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
....@@ -507,12 +868,15 @@
507868 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
508869 return ERR_PTR(-ENOMEM);
509870 }
871
+ mutex_init(&block->lock);
872
+ mutex_init(&block->proto_destroy_lock);
873
+ init_rwsem(&block->cb_lock);
874
+ flow_block_init(&block->flow_block);
510875 INIT_LIST_HEAD(&block->chain_list);
511
- INIT_LIST_HEAD(&block->cb_list);
512876 INIT_LIST_HEAD(&block->owner_list);
513877 INIT_LIST_HEAD(&block->chain0.filter_chain_list);
514878
515
- block->refcnt = 1;
879
+ refcount_set(&block->refcnt, 1);
516880 block->net = net;
517881 block->index = block_index;
518882
....@@ -529,6 +893,301 @@
529893 return idr_find(&tn->idr, block_index);
530894 }
531895
896
+static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
897
+{
898
+ struct tcf_block *block;
899
+
900
+ rcu_read_lock();
901
+ block = tcf_block_lookup(net, block_index);
902
+ if (block && !refcount_inc_not_zero(&block->refcnt))
903
+ block = NULL;
904
+ rcu_read_unlock();
905
+
906
+ return block;
907
+}
908
+
909
+static struct tcf_chain *
910
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
911
+{
912
+ mutex_lock(&block->lock);
913
+ if (chain)
914
+ chain = list_is_last(&chain->list, &block->chain_list) ?
915
+ NULL : list_next_entry(chain, list);
916
+ else
917
+ chain = list_first_entry_or_null(&block->chain_list,
918
+ struct tcf_chain, list);
919
+
920
+ /* skip all action-only chains */
921
+ while (chain && tcf_chain_held_by_acts_only(chain))
922
+ chain = list_is_last(&chain->list, &block->chain_list) ?
923
+ NULL : list_next_entry(chain, list);
924
+
925
+ if (chain)
926
+ tcf_chain_hold(chain);
927
+ mutex_unlock(&block->lock);
928
+
929
+ return chain;
930
+}
931
+
932
+/* Function to be used by all clients that want to iterate over all chains on
933
+ * block. It properly obtains block->lock and takes reference to chain before
934
+ * returning it. Users of this function must be tolerant to concurrent chain
935
+ * insertion/deletion or ensure that no concurrent chain modification is
936
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
937
+ * consistent dump because rtnl lock is released each time skb is filled with
938
+ * data and sent to user-space.
939
+ */
940
+
941
+struct tcf_chain *
942
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
943
+{
944
+ struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
945
+
946
+ if (chain)
947
+ tcf_chain_put(chain);
948
+
949
+ return chain_next;
950
+}
951
+EXPORT_SYMBOL(tcf_get_next_chain);
952
+
953
+static struct tcf_proto *
954
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
955
+{
956
+ u32 prio = 0;
957
+
958
+ ASSERT_RTNL();
959
+ mutex_lock(&chain->filter_chain_lock);
960
+
961
+ if (!tp) {
962
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
963
+ } else if (tcf_proto_is_deleting(tp)) {
964
+ /* 'deleting' flag is set and chain->filter_chain_lock was
965
+ * unlocked, which means next pointer could be invalid. Restart
966
+ * search.
967
+ */
968
+ prio = tp->prio + 1;
969
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
970
+
971
+ for (; tp; tp = tcf_chain_dereference(tp->next, chain))
972
+ if (!tp->deleting && tp->prio >= prio)
973
+ break;
974
+ } else {
975
+ tp = tcf_chain_dereference(tp->next, chain);
976
+ }
977
+
978
+ if (tp)
979
+ tcf_proto_get(tp);
980
+
981
+ mutex_unlock(&chain->filter_chain_lock);
982
+
983
+ return tp;
984
+}
985
+
986
+/* Function to be used by all clients that want to iterate over all tp's on
987
+ * chain. Users of this function must be tolerant to concurrent tp
988
+ * insertion/deletion or ensure that no concurrent chain modification is
989
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
990
+ * consistent dump because rtnl lock is released each time skb is filled with
991
+ * data and sent to user-space.
992
+ */
993
+
994
+struct tcf_proto *
995
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
996
+ bool rtnl_held)
997
+{
998
+ struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
999
+
1000
+ if (tp)
1001
+ tcf_proto_put(tp, rtnl_held, NULL);
1002
+
1003
+ return tp_next;
1004
+}
1005
+EXPORT_SYMBOL(tcf_get_next_proto);
1006
+
1007
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1008
+{
1009
+ struct tcf_chain *chain;
1010
+
1011
+ /* Last reference to block. At this point chains cannot be added or
1012
+ * removed concurrently.
1013
+ */
1014
+ for (chain = tcf_get_next_chain(block, NULL);
1015
+ chain;
1016
+ chain = tcf_get_next_chain(block, chain)) {
1017
+ tcf_chain_put_explicitly_created(chain);
1018
+ tcf_chain_flush(chain, rtnl_held);
1019
+ }
1020
+}
1021
+
1022
+/* Lookup Qdisc and increments its reference counter.
1023
+ * Set parent, if necessary.
1024
+ */
1025
+
1026
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1027
+ u32 *parent, int ifindex, bool rtnl_held,
1028
+ struct netlink_ext_ack *extack)
1029
+{
1030
+ const struct Qdisc_class_ops *cops;
1031
+ struct net_device *dev;
1032
+ int err = 0;
1033
+
1034
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1035
+ return 0;
1036
+
1037
+ rcu_read_lock();
1038
+
1039
+ /* Find link */
1040
+ dev = dev_get_by_index_rcu(net, ifindex);
1041
+ if (!dev) {
1042
+ rcu_read_unlock();
1043
+ return -ENODEV;
1044
+ }
1045
+
1046
+ /* Find qdisc */
1047
+ if (!*parent) {
1048
+ *q = rcu_dereference(dev->qdisc);
1049
+ *parent = (*q)->handle;
1050
+ } else {
1051
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1052
+ if (!*q) {
1053
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1054
+ err = -EINVAL;
1055
+ goto errout_rcu;
1056
+ }
1057
+ }
1058
+
1059
+ *q = qdisc_refcount_inc_nz(*q);
1060
+ if (!*q) {
1061
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1062
+ err = -EINVAL;
1063
+ goto errout_rcu;
1064
+ }
1065
+
1066
+ /* Is it classful? */
1067
+ cops = (*q)->ops->cl_ops;
1068
+ if (!cops) {
1069
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
1070
+ err = -EINVAL;
1071
+ goto errout_qdisc;
1072
+ }
1073
+
1074
+ if (!cops->tcf_block) {
1075
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1076
+ err = -EOPNOTSUPP;
1077
+ goto errout_qdisc;
1078
+ }
1079
+
1080
+errout_rcu:
1081
+ /* At this point we know that qdisc is not noop_qdisc,
1082
+ * which means that qdisc holds a reference to net_device
1083
+ * and we hold a reference to qdisc, so it is safe to release
1084
+ * rcu read lock.
1085
+ */
1086
+ rcu_read_unlock();
1087
+ return err;
1088
+
1089
+errout_qdisc:
1090
+ rcu_read_unlock();
1091
+
1092
+ if (rtnl_held)
1093
+ qdisc_put(*q);
1094
+ else
1095
+ qdisc_put_unlocked(*q);
1096
+ *q = NULL;
1097
+
1098
+ return err;
1099
+}
1100
+
1101
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1102
+ int ifindex, struct netlink_ext_ack *extack)
1103
+{
1104
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1105
+ return 0;
1106
+
1107
+ /* Do we search for filter, attached to class? */
1108
+ if (TC_H_MIN(parent)) {
1109
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1110
+
1111
+ *cl = cops->find(q, parent);
1112
+ if (*cl == 0) {
1113
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1114
+ return -ENOENT;
1115
+ }
1116
+ }
1117
+
1118
+ return 0;
1119
+}
1120
+
1121
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1122
+ unsigned long cl, int ifindex,
1123
+ u32 block_index,
1124
+ struct netlink_ext_ack *extack)
1125
+{
1126
+ struct tcf_block *block;
1127
+
1128
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1129
+ block = tcf_block_refcnt_get(net, block_index);
1130
+ if (!block) {
1131
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
1132
+ return ERR_PTR(-EINVAL);
1133
+ }
1134
+ } else {
1135
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1136
+
1137
+ block = cops->tcf_block(q, cl, extack);
1138
+ if (!block)
1139
+ return ERR_PTR(-EINVAL);
1140
+
1141
+ if (tcf_block_shared(block)) {
1142
+ NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1143
+ return ERR_PTR(-EOPNOTSUPP);
1144
+ }
1145
+
1146
+ /* Always take reference to block in order to support execution
1147
+ * of rules update path of cls API without rtnl lock. Caller
1148
+ * must release block when it is finished using it. 'if' block
1149
+ * of this conditional obtain reference to block by calling
1150
+ * tcf_block_refcnt_get().
1151
+ */
1152
+ refcount_inc(&block->refcnt);
1153
+ }
1154
+
1155
+ return block;
1156
+}
1157
+
1158
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1159
+ struct tcf_block_ext_info *ei, bool rtnl_held)
1160
+{
1161
+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1162
+ /* Flushing/putting all chains will cause the block to be
1163
+ * deallocated when last chain is freed. However, if chain_list
1164
+ * is empty, block has to be manually deallocated. After block
1165
+ * reference counter reached 0, it is no longer possible to
1166
+ * increment it or add new chains to block.
1167
+ */
1168
+ bool free_block = list_empty(&block->chain_list);
1169
+
1170
+ mutex_unlock(&block->lock);
1171
+ if (tcf_block_shared(block))
1172
+ tcf_block_remove(block, block->net);
1173
+
1174
+ if (q)
1175
+ tcf_block_offload_unbind(block, q, ei);
1176
+
1177
+ if (free_block)
1178
+ tcf_block_destroy(block);
1179
+ else
1180
+ tcf_block_flush_all_chains(block, rtnl_held);
1181
+ } else if (q) {
1182
+ tcf_block_offload_unbind(block, q, ei);
1183
+ }
1184
+}
1185
+
1186
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1187
+{
1188
+ __tcf_block_put(block, NULL, NULL, rtnl_held);
1189
+}
1190
+
5321191 /* Find tcf block.
5331192 * Set q, parent, cl when appropriate.
5341193 */
....@@ -541,121 +1200,60 @@
5411200 struct tcf_block *block;
5421201 int err = 0;
5431202
544
- if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
545
- block = tcf_block_lookup(net, block_index);
546
- if (!block) {
547
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
548
- return ERR_PTR(-EINVAL);
549
- }
550
- } else {
551
- const struct Qdisc_class_ops *cops;
552
- struct net_device *dev;
1203
+ ASSERT_RTNL();
5531204
554
- rcu_read_lock();
1205
+ err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1206
+ if (err)
1207
+ goto errout;
5551208
556
- /* Find link */
557
- dev = dev_get_by_index_rcu(net, ifindex);
558
- if (!dev) {
559
- rcu_read_unlock();
560
- return ERR_PTR(-ENODEV);
561
- }
1209
+ err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1210
+ if (err)
1211
+ goto errout_qdisc;
5621212
563
- /* Find qdisc */
564
- if (!*parent) {
565
- *q = dev->qdisc;
566
- *parent = (*q)->handle;
567
- } else {
568
- *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
569
- if (!*q) {
570
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
571
- err = -EINVAL;
572
- goto errout_rcu;
573
- }
574
- }
575
-
576
- *q = qdisc_refcount_inc_nz(*q);
577
- if (!*q) {
578
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
579
- err = -EINVAL;
580
- goto errout_rcu;
581
- }
582
-
583
- /* Is it classful? */
584
- cops = (*q)->ops->cl_ops;
585
- if (!cops) {
586
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
587
- err = -EINVAL;
588
- goto errout_rcu;
589
- }
590
-
591
- if (!cops->tcf_block) {
592
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
593
- err = -EOPNOTSUPP;
594
- goto errout_rcu;
595
- }
596
-
597
- /* At this point we know that qdisc is not noop_qdisc,
598
- * which means that qdisc holds a reference to net_device
599
- * and we hold a reference to qdisc, so it is safe to release
600
- * rcu read lock.
601
- */
602
- rcu_read_unlock();
603
-
604
- /* Do we search for filter, attached to class? */
605
- if (TC_H_MIN(*parent)) {
606
- *cl = cops->find(*q, *parent);
607
- if (*cl == 0) {
608
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
609
- err = -ENOENT;
610
- goto errout_qdisc;
611
- }
612
- }
613
-
614
- /* And the last stroke */
615
- block = cops->tcf_block(*q, *cl, extack);
616
- if (!block) {
617
- err = -EINVAL;
618
- goto errout_qdisc;
619
- }
620
- if (tcf_block_shared(block)) {
621
- NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
622
- err = -EOPNOTSUPP;
623
- goto errout_qdisc;
624
- }
1213
+ block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1214
+ if (IS_ERR(block)) {
1215
+ err = PTR_ERR(block);
1216
+ goto errout_qdisc;
6251217 }
6261218
6271219 return block;
6281220
629
-errout_rcu:
630
- rcu_read_unlock();
6311221 errout_qdisc:
632
- if (*q) {
1222
+ if (*q)
6331223 qdisc_put(*q);
634
- *q = NULL;
635
- }
1224
+errout:
1225
+ *q = NULL;
6361226 return ERR_PTR(err);
6371227 }
6381228
639
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
1229
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1230
+ bool rtnl_held)
6401231 {
641
- if (q)
642
- qdisc_put(q);
1232
+ if (!IS_ERR_OR_NULL(block))
1233
+ tcf_block_refcnt_put(block, rtnl_held);
1234
+
1235
+ if (q) {
1236
+ if (rtnl_held)
1237
+ qdisc_put(q);
1238
+ else
1239
+ qdisc_put_unlocked(q);
1240
+ }
6431241 }
6441242
6451243 struct tcf_block_owner_item {
6461244 struct list_head list;
6471245 struct Qdisc *q;
648
- enum tcf_block_binder_type binder_type;
1246
+ enum flow_block_binder_type binder_type;
6491247 };
6501248
6511249 static void
6521250 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
6531251 struct Qdisc *q,
654
- enum tcf_block_binder_type binder_type)
1252
+ enum flow_block_binder_type binder_type)
6551253 {
6561254 if (block->keep_dst &&
657
- binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
658
- binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1255
+ binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1256
+ binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
6591257 netif_keep_dst(qdisc_dev(q));
6601258 }
6611259
....@@ -672,7 +1270,7 @@
6721270
6731271 static int tcf_block_owner_add(struct tcf_block *block,
6741272 struct Qdisc *q,
675
- enum tcf_block_binder_type binder_type)
1273
+ enum flow_block_binder_type binder_type)
6761274 {
6771275 struct tcf_block_owner_item *item;
6781276
....@@ -687,7 +1285,7 @@
6871285
6881286 static void tcf_block_owner_del(struct tcf_block *block,
6891287 struct Qdisc *q,
690
- enum tcf_block_binder_type binder_type)
1288
+ enum flow_block_binder_type binder_type)
6911289 {
6921290 struct tcf_block_owner_item *item;
6931291
....@@ -707,21 +1305,16 @@
7071305 {
7081306 struct net *net = qdisc_net(q);
7091307 struct tcf_block *block = NULL;
710
- bool created = false;
7111308 int err;
7121309
713
- if (ei->block_index) {
1310
+ if (ei->block_index)
7141311 /* block_index not 0 means the shared block is requested */
715
- block = tcf_block_lookup(net, ei->block_index);
716
- if (block)
717
- block->refcnt++;
718
- }
1312
+ block = tcf_block_refcnt_get(net, ei->block_index);
7191313
7201314 if (!block) {
7211315 block = tcf_block_create(net, q, ei->block_index, extack);
7221316 if (IS_ERR(block))
7231317 return PTR_ERR(block);
724
- created = true;
7251318 if (tcf_block_shared(block)) {
7261319 err = tcf_block_insert(block, net, extack);
7271320 if (err)
....@@ -751,14 +1344,8 @@
7511344 err_chain0_head_change_cb_add:
7521345 tcf_block_owner_del(block, q, ei->binder_type);
7531346 err_block_owner_add:
754
- if (created) {
755
- if (tcf_block_shared(block))
756
- tcf_block_remove(block, net);
7571347 err_block_insert:
758
- kfree(block);
759
- } else {
760
- block->refcnt--;
761
- }
1348
+ tcf_block_refcnt_put(block, true);
7621349 return err;
7631350 }
7641351 EXPORT_SYMBOL(tcf_block_get_ext);
....@@ -790,42 +1377,12 @@
7901377 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
7911378 struct tcf_block_ext_info *ei)
7921379 {
793
- struct tcf_chain *chain, *tmp;
794
-
7951380 if (!block)
7961381 return;
7971382 tcf_chain0_head_change_cb_del(block, ei);
7981383 tcf_block_owner_del(block, q, ei->binder_type);
7991384
800
- if (block->refcnt == 1) {
801
- if (tcf_block_shared(block))
802
- tcf_block_remove(block, block->net);
803
-
804
- /* Hold a refcnt for all chains, so that they don't disappear
805
- * while we are iterating.
806
- */
807
- list_for_each_entry(chain, &block->chain_list, list)
808
- tcf_chain_hold(chain);
809
-
810
- list_for_each_entry(chain, &block->chain_list, list)
811
- tcf_chain_flush(chain);
812
- }
813
-
814
- tcf_block_offload_unbind(block, q, ei);
815
-
816
- if (block->refcnt == 1) {
817
- /* At this point, all the chains should have refcnt >= 1. */
818
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
819
- tcf_chain_put_explicitly_created(chain);
820
- tcf_chain_put(chain);
821
- }
822
-
823
- block->refcnt--;
824
- if (list_empty(&block->chain_list))
825
- kfree(block);
826
- } else {
827
- block->refcnt--;
828
- }
1385
+ __tcf_block_put(block, q, ei, true);
8291386 }
8301387 EXPORT_SYMBOL(tcf_block_put_ext);
8311388
....@@ -840,55 +1397,26 @@
8401397
8411398 EXPORT_SYMBOL(tcf_block_put);
8421399
843
-struct tcf_block_cb {
844
- struct list_head list;
845
- tc_setup_cb_t *cb;
846
- void *cb_ident;
847
- void *cb_priv;
848
- unsigned int refcnt;
849
-};
850
-
851
-void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
852
-{
853
- return block_cb->cb_priv;
854
-}
855
-EXPORT_SYMBOL(tcf_block_cb_priv);
856
-
857
-struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
858
- tc_setup_cb_t *cb, void *cb_ident)
859
-{ struct tcf_block_cb *block_cb;
860
-
861
- list_for_each_entry(block_cb, &block->cb_list, list)
862
- if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
863
- return block_cb;
864
- return NULL;
865
-}
866
-EXPORT_SYMBOL(tcf_block_cb_lookup);
867
-
868
-void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
869
-{
870
- block_cb->refcnt++;
871
-}
872
-EXPORT_SYMBOL(tcf_block_cb_incref);
873
-
874
-unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
875
-{
876
- return --block_cb->refcnt;
877
-}
878
-EXPORT_SYMBOL(tcf_block_cb_decref);
879
-
8801400 static int
881
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
1401
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
8821402 void *cb_priv, bool add, bool offload_in_use,
8831403 struct netlink_ext_ack *extack)
8841404 {
885
- struct tcf_chain *chain;
886
- struct tcf_proto *tp;
1405
+ struct tcf_chain *chain, *chain_prev;
1406
+ struct tcf_proto *tp, *tp_prev;
8871407 int err;
8881408
889
- list_for_each_entry(chain, &block->chain_list, list) {
890
- for (tp = rtnl_dereference(chain->filter_chain); tp;
891
- tp = rtnl_dereference(tp->next)) {
1409
+ lockdep_assert_held(&block->cb_lock);
1410
+
1411
+ for (chain = __tcf_get_next_chain(block, NULL);
1412
+ chain;
1413
+ chain_prev = chain,
1414
+ chain = __tcf_get_next_chain(block, chain),
1415
+ tcf_chain_put(chain_prev)) {
1416
+ for (tp = __tcf_get_next_proto(chain, NULL); tp;
1417
+ tp_prev = tp,
1418
+ tp = __tcf_get_next_proto(chain, tp),
1419
+ tcf_proto_put(tp_prev, true, NULL)) {
8921420 if (tp->ops->reoffload) {
8931421 err = tp->ops->reoffload(tp, add, cb, cb_priv,
8941422 extack);
....@@ -905,105 +1433,108 @@
9051433 return 0;
9061434
9071435 err_playback_remove:
1436
+ tcf_proto_put(tp, true, NULL);
1437
+ tcf_chain_put(chain);
9081438 tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
9091439 extack);
9101440 return err;
9111441 }
9121442
913
-struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
914
- tc_setup_cb_t *cb, void *cb_ident,
915
- void *cb_priv,
916
- struct netlink_ext_ack *extack)
1443
+static int tcf_block_bind(struct tcf_block *block,
1444
+ struct flow_block_offload *bo)
9171445 {
918
- struct tcf_block_cb *block_cb;
919
- int err;
1446
+ struct flow_block_cb *block_cb, *next;
1447
+ int err, i = 0;
9201448
921
- /* Replay any already present rules */
922
- err = tcf_block_playback_offloads(block, cb, cb_priv, true,
923
- tcf_block_offload_in_use(block),
924
- extack);
925
- if (err)
926
- return ERR_PTR(err);
1449
+ lockdep_assert_held(&block->cb_lock);
9271450
928
- block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
929
- if (!block_cb)
930
- return ERR_PTR(-ENOMEM);
931
- block_cb->cb = cb;
932
- block_cb->cb_ident = cb_ident;
933
- block_cb->cb_priv = cb_priv;
934
- list_add(&block_cb->list, &block->cb_list);
935
- return block_cb;
936
-}
937
-EXPORT_SYMBOL(__tcf_block_cb_register);
1451
+ list_for_each_entry(block_cb, &bo->cb_list, list) {
1452
+ err = tcf_block_playback_offloads(block, block_cb->cb,
1453
+ block_cb->cb_priv, true,
1454
+ tcf_block_offload_in_use(block),
1455
+ bo->extack);
1456
+ if (err)
1457
+ goto err_unroll;
1458
+ if (!bo->unlocked_driver_cb)
1459
+ block->lockeddevcnt++;
9381460
939
-int tcf_block_cb_register(struct tcf_block *block,
940
- tc_setup_cb_t *cb, void *cb_ident,
941
- void *cb_priv, struct netlink_ext_ack *extack)
942
-{
943
- struct tcf_block_cb *block_cb;
944
-
945
- block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
946
- extack);
947
- return PTR_ERR_OR_ZERO(block_cb);
948
-}
949
-EXPORT_SYMBOL(tcf_block_cb_register);
950
-
951
-void __tcf_block_cb_unregister(struct tcf_block *block,
952
- struct tcf_block_cb *block_cb)
953
-{
954
- tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
955
- false, tcf_block_offload_in_use(block),
956
- NULL);
957
- list_del(&block_cb->list);
958
- kfree(block_cb);
959
-}
960
-EXPORT_SYMBOL(__tcf_block_cb_unregister);
961
-
962
-void tcf_block_cb_unregister(struct tcf_block *block,
963
- tc_setup_cb_t *cb, void *cb_ident)
964
-{
965
- struct tcf_block_cb *block_cb;
966
-
967
- block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
968
- if (!block_cb)
969
- return;
970
- __tcf_block_cb_unregister(block, block_cb);
971
-}
972
-EXPORT_SYMBOL(tcf_block_cb_unregister);
973
-
974
-static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
975
- void *type_data, bool err_stop)
976
-{
977
- struct tcf_block_cb *block_cb;
978
- int ok_count = 0;
979
- int err;
980
-
981
- /* Make sure all netdevs sharing this block are offload-capable. */
982
- if (block->nooffloaddevcnt && err_stop)
983
- return -EOPNOTSUPP;
984
-
985
- list_for_each_entry(block_cb, &block->cb_list, list) {
986
- err = block_cb->cb(type, type_data, block_cb->cb_priv);
987
- if (err) {
988
- if (err_stop)
989
- return err;
990
- } else {
991
- ok_count++;
992
- }
1461
+ i++;
9931462 }
994
- return ok_count;
1463
+ list_splice(&bo->cb_list, &block->flow_block.cb_list);
1464
+
1465
+ return 0;
1466
+
1467
+err_unroll:
1468
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1469
+ list_del(&block_cb->driver_list);
1470
+ if (i-- > 0) {
1471
+ list_del(&block_cb->list);
1472
+ tcf_block_playback_offloads(block, block_cb->cb,
1473
+ block_cb->cb_priv, false,
1474
+ tcf_block_offload_in_use(block),
1475
+ NULL);
1476
+ if (!bo->unlocked_driver_cb)
1477
+ block->lockeddevcnt--;
1478
+ }
1479
+ flow_block_cb_free(block_cb);
1480
+ }
1481
+
1482
+ return err;
1483
+}
1484
+
1485
+static void tcf_block_unbind(struct tcf_block *block,
1486
+ struct flow_block_offload *bo)
1487
+{
1488
+ struct flow_block_cb *block_cb, *next;
1489
+
1490
+ lockdep_assert_held(&block->cb_lock);
1491
+
1492
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1493
+ tcf_block_playback_offloads(block, block_cb->cb,
1494
+ block_cb->cb_priv, false,
1495
+ tcf_block_offload_in_use(block),
1496
+ NULL);
1497
+ list_del(&block_cb->list);
1498
+ flow_block_cb_free(block_cb);
1499
+ if (!bo->unlocked_driver_cb)
1500
+ block->lockeddevcnt--;
1501
+ }
1502
+}
1503
+
1504
+static int tcf_block_setup(struct tcf_block *block,
1505
+ struct flow_block_offload *bo)
1506
+{
1507
+ int err;
1508
+
1509
+ switch (bo->command) {
1510
+ case FLOW_BLOCK_BIND:
1511
+ err = tcf_block_bind(block, bo);
1512
+ break;
1513
+ case FLOW_BLOCK_UNBIND:
1514
+ err = 0;
1515
+ tcf_block_unbind(block, bo);
1516
+ break;
1517
+ default:
1518
+ WARN_ON_ONCE(1);
1519
+ err = -EOPNOTSUPP;
1520
+ }
1521
+
1522
+ return err;
9951523 }
9961524
9971525 /* Main classifier routine: scans classifier chain attached
9981526 * to this qdisc, (optionally) tests for protocol and asks
9991527 * specific classifiers.
10001528 */
1001
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1002
- struct tcf_result *res, bool compat_mode)
1529
+static inline int __tcf_classify(struct sk_buff *skb,
1530
+ const struct tcf_proto *tp,
1531
+ const struct tcf_proto *orig_tp,
1532
+ struct tcf_result *res,
1533
+ bool compat_mode,
1534
+ u32 *last_executed_chain)
10031535 {
10041536 #ifdef CONFIG_NET_CLS_ACT
1005
- const int max_reclassify_loop = 4;
1006
- const struct tcf_proto *orig_tp = tp;
1537
+ const int max_reclassify_loop = 16;
10071538 const struct tcf_proto *first_tp;
10081539 int limit = 0;
10091540
....@@ -1021,9 +1552,11 @@
10211552 #ifdef CONFIG_NET_CLS_ACT
10221553 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
10231554 first_tp = orig_tp;
1555
+ *last_executed_chain = first_tp->chain->index;
10241556 goto reset;
10251557 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
10261558 first_tp = res->goto_tp;
1559
+ *last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
10271560 goto reset;
10281561 }
10291562 #endif
....@@ -1046,39 +1579,188 @@
10461579 goto reclassify;
10471580 #endif
10481581 }
1582
+
1583
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1584
+ struct tcf_result *res, bool compat_mode)
1585
+{
1586
+ u32 last_executed_chain = 0;
1587
+
1588
+ return __tcf_classify(skb, tp, tp, res, compat_mode,
1589
+ &last_executed_chain);
1590
+}
10491591 EXPORT_SYMBOL(tcf_classify);
1592
+
1593
+int tcf_classify_ingress(struct sk_buff *skb,
1594
+ const struct tcf_block *ingress_block,
1595
+ const struct tcf_proto *tp,
1596
+ struct tcf_result *res, bool compat_mode)
1597
+{
1598
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1599
+ u32 last_executed_chain = 0;
1600
+
1601
+ return __tcf_classify(skb, tp, tp, res, compat_mode,
1602
+ &last_executed_chain);
1603
+#else
1604
+ u32 last_executed_chain = tp ? tp->chain->index : 0;
1605
+ const struct tcf_proto *orig_tp = tp;
1606
+ struct tc_skb_ext *ext;
1607
+ int ret;
1608
+
1609
+ ext = skb_ext_find(skb, TC_SKB_EXT);
1610
+
1611
+ if (ext && ext->chain) {
1612
+ struct tcf_chain *fchain;
1613
+
1614
+ fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
1615
+ if (!fchain)
1616
+ return TC_ACT_SHOT;
1617
+
1618
+ /* Consume, so cloned/redirect skbs won't inherit ext */
1619
+ skb_ext_del(skb, TC_SKB_EXT);
1620
+
1621
+ tp = rcu_dereference_bh(fchain->filter_chain);
1622
+ last_executed_chain = fchain->index;
1623
+ }
1624
+
1625
+ ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
1626
+ &last_executed_chain);
1627
+
1628
+ /* If we missed on some chain */
1629
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1630
+ ext = tc_skb_ext_alloc(skb);
1631
+ if (WARN_ON_ONCE(!ext))
1632
+ return TC_ACT_SHOT;
1633
+ ext->chain = last_executed_chain;
1634
+ ext->mru = qdisc_skb_cb(skb)->mru;
1635
+ }
1636
+
1637
+ return ret;
1638
+#endif
1639
+}
1640
+EXPORT_SYMBOL(tcf_classify_ingress);
10501641
10511642 struct tcf_chain_info {
10521643 struct tcf_proto __rcu **pprev;
10531644 struct tcf_proto __rcu *next;
10541645 };
10551646
1056
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
1647
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1648
+ struct tcf_chain_info *chain_info)
10571649 {
1058
- return rtnl_dereference(*chain_info->pprev);
1650
+ return tcf_chain_dereference(*chain_info->pprev, chain);
10591651 }
10601652
1061
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
1062
- struct tcf_chain_info *chain_info,
1063
- struct tcf_proto *tp)
1653
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
1654
+ struct tcf_chain_info *chain_info,
1655
+ struct tcf_proto *tp)
10641656 {
1657
+ if (chain->flushing)
1658
+ return -EAGAIN;
1659
+
1660
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
10651661 if (*chain_info->pprev == chain->filter_chain)
10661662 tcf_chain0_head_change(chain, tp);
1067
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
1663
+ tcf_proto_get(tp);
10681664 rcu_assign_pointer(*chain_info->pprev, tp);
1069
- tcf_chain_hold(chain);
1665
+
1666
+ return 0;
10701667 }
10711668
10721669 static void tcf_chain_tp_remove(struct tcf_chain *chain,
10731670 struct tcf_chain_info *chain_info,
10741671 struct tcf_proto *tp)
10751672 {
1076
- struct tcf_proto *next = rtnl_dereference(chain_info->next);
1673
+ struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
10771674
1675
+ tcf_proto_mark_delete(tp);
10781676 if (tp == chain->filter_chain)
10791677 tcf_chain0_head_change(chain, next);
10801678 RCU_INIT_POINTER(*chain_info->pprev, next);
1081
- tcf_chain_put(chain);
1679
+}
1680
+
1681
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1682
+ struct tcf_chain_info *chain_info,
1683
+ u32 protocol, u32 prio,
1684
+ bool prio_allocate);
1685
+
1686
+/* Try to insert new proto.
1687
+ * If proto with specified priority already exists, free new proto
1688
+ * and return existing one.
1689
+ */
1690
+
1691
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1692
+ struct tcf_proto *tp_new,
1693
+ u32 protocol, u32 prio,
1694
+ bool rtnl_held)
1695
+{
1696
+ struct tcf_chain_info chain_info;
1697
+ struct tcf_proto *tp;
1698
+ int err = 0;
1699
+
1700
+ mutex_lock(&chain->filter_chain_lock);
1701
+
1702
+ if (tcf_proto_exists_destroying(chain, tp_new)) {
1703
+ mutex_unlock(&chain->filter_chain_lock);
1704
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1705
+ return ERR_PTR(-EAGAIN);
1706
+ }
1707
+
1708
+ tp = tcf_chain_tp_find(chain, &chain_info,
1709
+ protocol, prio, false);
1710
+ if (!tp)
1711
+ err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1712
+ mutex_unlock(&chain->filter_chain_lock);
1713
+
1714
+ if (tp) {
1715
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1716
+ tp_new = tp;
1717
+ } else if (err) {
1718
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1719
+ tp_new = ERR_PTR(err);
1720
+ }
1721
+
1722
+ return tp_new;
1723
+}
1724
+
1725
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1726
+ struct tcf_proto *tp, bool rtnl_held,
1727
+ struct netlink_ext_ack *extack)
1728
+{
1729
+ struct tcf_chain_info chain_info;
1730
+ struct tcf_proto *tp_iter;
1731
+ struct tcf_proto **pprev;
1732
+ struct tcf_proto *next;
1733
+
1734
+ mutex_lock(&chain->filter_chain_lock);
1735
+
1736
+ /* Atomically find and remove tp from chain. */
1737
+ for (pprev = &chain->filter_chain;
1738
+ (tp_iter = tcf_chain_dereference(*pprev, chain));
1739
+ pprev = &tp_iter->next) {
1740
+ if (tp_iter == tp) {
1741
+ chain_info.pprev = pprev;
1742
+ chain_info.next = tp_iter->next;
1743
+ WARN_ON(tp_iter->deleting);
1744
+ break;
1745
+ }
1746
+ }
1747
+ /* Verify that tp still exists and no new filters were inserted
1748
+ * concurrently.
1749
+ * Mark tp for deletion if it is empty.
1750
+ */
1751
+ if (!tp_iter || !tcf_proto_check_delete(tp)) {
1752
+ mutex_unlock(&chain->filter_chain_lock);
1753
+ return;
1754
+ }
1755
+
1756
+ tcf_proto_signal_destroying(chain, tp);
1757
+ next = tcf_chain_dereference(chain_info.next, chain);
1758
+ if (tp == chain->filter_chain)
1759
+ tcf_chain0_head_change(chain, next);
1760
+ RCU_INIT_POINTER(*chain_info.pprev, next);
1761
+ mutex_unlock(&chain->filter_chain_lock);
1762
+
1763
+ tcf_proto_put(tp, rtnl_held, extack);
10821764 }
10831765
10841766 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
....@@ -1091,7 +1773,8 @@
10911773
10921774 /* Check the chain for existence of proto-tcf with this priority */
10931775 for (pprev = &chain->filter_chain;
1094
- (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
1776
+ (tp = tcf_chain_dereference(*pprev, chain));
1777
+ pprev = &tp->next) {
10951778 if (tp->prio >= prio) {
10961779 if (tp->prio == prio) {
10971780 if (prio_allocate ||
....@@ -1104,14 +1787,20 @@
11041787 }
11051788 }
11061789 chain_info->pprev = pprev;
1107
- chain_info->next = tp ? tp->next : NULL;
1790
+ if (tp) {
1791
+ chain_info->next = tp->next;
1792
+ tcf_proto_get(tp);
1793
+ } else {
1794
+ chain_info->next = NULL;
1795
+ }
11081796 return tp;
11091797 }
11101798
11111799 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
11121800 struct tcf_proto *tp, struct tcf_block *block,
11131801 struct Qdisc *q, u32 parent, void *fh,
1114
- u32 portid, u32 seq, u16 flags, int event)
1802
+ u32 portid, u32 seq, u16 flags, int event,
1803
+ bool terse_dump, bool rtnl_held)
11151804 {
11161805 struct tcmsg *tcm;
11171806 struct nlmsghdr *nlh;
....@@ -1138,8 +1827,17 @@
11381827 goto nla_put_failure;
11391828 if (!fh) {
11401829 tcm->tcm_handle = 0;
1830
+ } else if (terse_dump) {
1831
+ if (tp->ops->terse_dump) {
1832
+ if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
1833
+ rtnl_held) < 0)
1834
+ goto nla_put_failure;
1835
+ } else {
1836
+ goto cls_op_not_supp;
1837
+ }
11411838 } else {
1142
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
1839
+ if (tp->ops->dump &&
1840
+ tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
11431841 goto nla_put_failure;
11441842 }
11451843 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
....@@ -1147,6 +1845,7 @@
11471845
11481846 out_nlmsg_trim:
11491847 nla_put_failure:
1848
+cls_op_not_supp:
11501849 nlmsg_trim(skb, b);
11511850 return -1;
11521851 }
....@@ -1154,33 +1853,40 @@
11541853 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
11551854 struct nlmsghdr *n, struct tcf_proto *tp,
11561855 struct tcf_block *block, struct Qdisc *q,
1157
- u32 parent, void *fh, int event, bool unicast)
1856
+ u32 parent, void *fh, int event, bool unicast,
1857
+ bool rtnl_held)
11581858 {
11591859 struct sk_buff *skb;
11601860 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1861
+ int err = 0;
11611862
11621863 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
11631864 if (!skb)
11641865 return -ENOBUFS;
11651866
11661867 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1167
- n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
1868
+ n->nlmsg_seq, n->nlmsg_flags, event,
1869
+ false, rtnl_held) <= 0) {
11681870 kfree_skb(skb);
11691871 return -EINVAL;
11701872 }
11711873
11721874 if (unicast)
1173
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1875
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1876
+ else
1877
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1878
+ n->nlmsg_flags & NLM_F_ECHO);
11741879
1175
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1176
- n->nlmsg_flags & NLM_F_ECHO);
1880
+ if (err > 0)
1881
+ err = 0;
1882
+ return err;
11771883 }
11781884
11791885 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
11801886 struct nlmsghdr *n, struct tcf_proto *tp,
11811887 struct tcf_block *block, struct Qdisc *q,
11821888 u32 parent, void *fh, bool unicast, bool *last,
1183
- struct netlink_ext_ack *extack)
1889
+ bool rtnl_held, struct netlink_ext_ack *extack)
11841890 {
11851891 struct sk_buff *skb;
11861892 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
....@@ -1191,39 +1897,50 @@
11911897 return -ENOBUFS;
11921898
11931899 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1194
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1900
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1901
+ false, rtnl_held) <= 0) {
11951902 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
11961903 kfree_skb(skb);
11971904 return -EINVAL;
11981905 }
11991906
1200
- err = tp->ops->delete(tp, fh, last, extack);
1907
+ err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
12011908 if (err) {
12021909 kfree_skb(skb);
12031910 return err;
12041911 }
12051912
12061913 if (unicast)
1207
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1208
-
1209
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1210
- n->nlmsg_flags & NLM_F_ECHO);
1914
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1915
+ else
1916
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1917
+ n->nlmsg_flags & NLM_F_ECHO);
12111918 if (err < 0)
12121919 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1920
+
1921
+ if (err > 0)
1922
+ err = 0;
12131923 return err;
12141924 }
12151925
12161926 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
12171927 struct tcf_block *block, struct Qdisc *q,
12181928 u32 parent, struct nlmsghdr *n,
1219
- struct tcf_chain *chain, int event)
1929
+ struct tcf_chain *chain, int event,
1930
+ bool rtnl_held)
12201931 {
12211932 struct tcf_proto *tp;
12221933
1223
- for (tp = rtnl_dereference(chain->filter_chain);
1224
- tp; tp = rtnl_dereference(tp->next))
1934
+ for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1935
+ tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
12251936 tfilter_notify(net, oskb, n, tp, block,
1226
- q, parent, NULL, event, false);
1937
+ q, parent, NULL, event, false, rtnl_held);
1938
+}
1939
+
1940
+static void tfilter_put(struct tcf_proto *tp, void *fh)
1941
+{
1942
+ if (tp->ops->put && fh)
1943
+ tp->ops->put(tp, fh);
12271944 }
12281945
12291946 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1231,21 +1948,23 @@
12311948 {
12321949 struct net *net = sock_net(skb->sk);
12331950 struct nlattr *tca[TCA_MAX + 1];
1951
+ char name[IFNAMSIZ];
12341952 struct tcmsg *t;
12351953 u32 protocol;
12361954 u32 prio;
12371955 bool prio_allocate;
12381956 u32 parent;
12391957 u32 chain_index;
1240
- struct Qdisc *q = NULL;
1958
+ struct Qdisc *q;
12411959 struct tcf_chain_info chain_info;
1242
- struct tcf_chain *chain = NULL;
1960
+ struct tcf_chain *chain;
12431961 struct tcf_block *block;
12441962 struct tcf_proto *tp;
12451963 unsigned long cl;
12461964 void *fh;
12471965 int err;
12481966 int tp_created;
1967
+ bool rtnl_held = false;
12491968
12501969 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
12511970 return -EPERM;
....@@ -1253,7 +1972,8 @@
12531972 replay:
12541973 tp_created = 0;
12551974
1256
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1975
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1976
+ rtm_tca_policy, extack);
12571977 if (err < 0)
12581978 return err;
12591979
....@@ -1262,7 +1982,11 @@
12621982 prio = TC_H_MAJ(t->tcm_info);
12631983 prio_allocate = false;
12641984 parent = t->tcm_parent;
1985
+ tp = NULL;
12651986 cl = 0;
1987
+ block = NULL;
1988
+ q = NULL;
1989
+ chain = NULL;
12661990
12671991 if (prio == 0) {
12681992 /* If no priority is provided by the user,
....@@ -1279,12 +2003,38 @@
12792003
12802004 /* Find head of filter chain. */
12812005
1282
- block = tcf_block_find(net, &q, &parent, &cl,
1283
- t->tcm_ifindex, t->tcm_block_index, extack);
2006
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2007
+ if (err)
2008
+ return err;
2009
+
2010
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2011
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2012
+ err = -EINVAL;
2013
+ goto errout;
2014
+ }
2015
+
2016
+ /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2017
+ * block is shared (no qdisc found), qdisc is not unlocked, classifier
2018
+ * type is not specified, classifier is not unlocked.
2019
+ */
2020
+ if (rtnl_held ||
2021
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2022
+ !tcf_proto_is_unlocked(name)) {
2023
+ rtnl_held = true;
2024
+ rtnl_lock();
2025
+ }
2026
+
2027
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2028
+ if (err)
2029
+ goto errout;
2030
+
2031
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2032
+ extack);
12842033 if (IS_ERR(block)) {
12852034 err = PTR_ERR(block);
12862035 goto errout;
12872036 }
2037
+ block->classid = parent;
12882038
12892039 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
12902040 if (chain_index > TC_ACT_EXT_VAL_MASK) {
....@@ -1299,40 +2049,61 @@
12992049 goto errout;
13002050 }
13012051
2052
+ mutex_lock(&chain->filter_chain_lock);
13022053 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
13032054 prio, prio_allocate);
13042055 if (IS_ERR(tp)) {
13052056 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
13062057 err = PTR_ERR(tp);
1307
- goto errout;
2058
+ goto errout_locked;
13082059 }
13092060
13102061 if (tp == NULL) {
2062
+ struct tcf_proto *tp_new = NULL;
2063
+
2064
+ if (chain->flushing) {
2065
+ err = -EAGAIN;
2066
+ goto errout_locked;
2067
+ }
2068
+
13112069 /* Proto-tcf does not exist, create new one */
13122070
13132071 if (tca[TCA_KIND] == NULL || !protocol) {
13142072 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
13152073 err = -EINVAL;
1316
- goto errout;
2074
+ goto errout_locked;
13172075 }
13182076
13192077 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
13202078 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
13212079 err = -ENOENT;
1322
- goto errout;
2080
+ goto errout_locked;
13232081 }
13242082
13252083 if (prio_allocate)
1326
- prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
2084
+ prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2085
+ &chain_info));
13272086
1328
- tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1329
- protocol, prio, chain, extack);
2087
+ mutex_unlock(&chain->filter_chain_lock);
2088
+ tp_new = tcf_proto_create(name, protocol, prio, chain,
2089
+ rtnl_held, extack);
2090
+ if (IS_ERR(tp_new)) {
2091
+ err = PTR_ERR(tp_new);
2092
+ goto errout_tp;
2093
+ }
2094
+
2095
+ tp_created = 1;
2096
+ tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2097
+ rtnl_held);
13302098 if (IS_ERR(tp)) {
13312099 err = PTR_ERR(tp);
1332
- goto errout;
2100
+ goto errout_tp;
13332101 }
1334
- tp_created = 1;
1335
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2102
+ } else {
2103
+ mutex_unlock(&chain->filter_chain_lock);
2104
+ }
2105
+
2106
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
13362107 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
13372108 err = -EINVAL;
13382109 goto errout;
....@@ -1347,12 +2118,14 @@
13472118 goto errout;
13482119 }
13492120 } else if (n->nlmsg_flags & NLM_F_EXCL) {
2121
+ tfilter_put(tp, fh);
13502122 NL_SET_ERR_MSG(extack, "Filter already exists");
13512123 err = -EEXIST;
13522124 goto errout;
13532125 }
13542126
13552127 if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2128
+ tfilter_put(tp, fh);
13562129 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
13572130 err = -EINVAL;
13582131 goto errout;
....@@ -1360,28 +2133,44 @@
13602133
13612134 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
13622135 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1363
- extack);
2136
+ rtnl_held, extack);
13642137 if (err == 0) {
1365
- if (tp_created)
1366
- tcf_chain_tp_insert(chain, &chain_info, tp);
13672138 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1368
- RTM_NEWTFILTER, false);
2139
+ RTM_NEWTFILTER, false, rtnl_held);
2140
+ tfilter_put(tp, fh);
13692141 /* q pointer is NULL for shared blocks */
13702142 if (q)
13712143 q->flags &= ~TCQ_F_CAN_BYPASS;
1372
- } else {
1373
- if (tp_created)
1374
- tcf_proto_destroy(tp, NULL);
13752144 }
13762145
13772146 errout:
1378
- if (chain)
1379
- tcf_chain_put(chain);
1380
- tcf_block_release(q, block);
1381
- if (err == -EAGAIN)
2147
+ if (err && tp_created)
2148
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2149
+errout_tp:
2150
+ if (chain) {
2151
+ if (tp && !IS_ERR(tp))
2152
+ tcf_proto_put(tp, rtnl_held, NULL);
2153
+ if (!tp_created)
2154
+ tcf_chain_put(chain);
2155
+ }
2156
+ tcf_block_release(q, block, rtnl_held);
2157
+
2158
+ if (rtnl_held)
2159
+ rtnl_unlock();
2160
+
2161
+ if (err == -EAGAIN) {
2162
+ /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2163
+ * of target chain.
2164
+ */
2165
+ rtnl_held = true;
13822166 /* Replay the request. */
13832167 goto replay;
2168
+ }
13842169 return err;
2170
+
2171
+errout_locked:
2172
+ mutex_unlock(&chain->filter_chain_lock);
2173
+ goto errout;
13852174 }
13862175
13872176 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1389,6 +2178,7 @@
13892178 {
13902179 struct net *net = sock_net(skb->sk);
13912180 struct nlattr *tca[TCA_MAX + 1];
2181
+ char name[IFNAMSIZ];
13922182 struct tcmsg *t;
13932183 u32 protocol;
13942184 u32 prio;
....@@ -1397,16 +2187,18 @@
13972187 struct Qdisc *q = NULL;
13982188 struct tcf_chain_info chain_info;
13992189 struct tcf_chain *chain = NULL;
1400
- struct tcf_block *block;
2190
+ struct tcf_block *block = NULL;
14012191 struct tcf_proto *tp = NULL;
14022192 unsigned long cl = 0;
14032193 void *fh = NULL;
14042194 int err;
2195
+ bool rtnl_held = false;
14052196
14062197 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
14072198 return -EPERM;
14082199
1409
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2200
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2201
+ rtm_tca_policy, extack);
14102202 if (err < 0)
14112203 return err;
14122204
....@@ -1422,8 +2214,32 @@
14222214
14232215 /* Find head of filter chain. */
14242216
1425
- block = tcf_block_find(net, &q, &parent, &cl,
1426
- t->tcm_ifindex, t->tcm_block_index, extack);
2217
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2218
+ if (err)
2219
+ return err;
2220
+
2221
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2222
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2223
+ err = -EINVAL;
2224
+ goto errout;
2225
+ }
2226
+ /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2227
+ * found), qdisc is not unlocked, classifier type is not specified,
2228
+ * classifier is not unlocked.
2229
+ */
2230
+ if (!prio ||
2231
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2232
+ !tcf_proto_is_unlocked(name)) {
2233
+ rtnl_held = true;
2234
+ rtnl_lock();
2235
+ }
2236
+
2237
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2238
+ if (err)
2239
+ goto errout;
2240
+
2241
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2242
+ extack);
14272243 if (IS_ERR(block)) {
14282244 err = PTR_ERR(block);
14292245 goto errout;
....@@ -1451,56 +2267,70 @@
14512267
14522268 if (prio == 0) {
14532269 tfilter_notify_chain(net, skb, block, q, parent, n,
1454
- chain, RTM_DELTFILTER);
1455
- tcf_chain_flush(chain);
2270
+ chain, RTM_DELTFILTER, rtnl_held);
2271
+ tcf_chain_flush(chain, rtnl_held);
14562272 err = 0;
14572273 goto errout;
14582274 }
14592275
2276
+ mutex_lock(&chain->filter_chain_lock);
14602277 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
14612278 prio, false);
14622279 if (!tp || IS_ERR(tp)) {
14632280 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
14642281 err = tp ? PTR_ERR(tp) : -ENOENT;
1465
- goto errout;
2282
+ goto errout_locked;
14662283 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
14672284 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
14682285 err = -EINVAL;
2286
+ goto errout_locked;
2287
+ } else if (t->tcm_handle == 0) {
2288
+ tcf_proto_signal_destroying(chain, tp);
2289
+ tcf_chain_tp_remove(chain, &chain_info, tp);
2290
+ mutex_unlock(&chain->filter_chain_lock);
2291
+
2292
+ tcf_proto_put(tp, rtnl_held, NULL);
2293
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2294
+ RTM_DELTFILTER, false, rtnl_held);
2295
+ err = 0;
14692296 goto errout;
14702297 }
2298
+ mutex_unlock(&chain->filter_chain_lock);
14712299
14722300 fh = tp->ops->get(tp, t->tcm_handle);
14732301
14742302 if (!fh) {
1475
- if (t->tcm_handle == 0) {
1476
- tcf_chain_tp_remove(chain, &chain_info, tp);
1477
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1478
- RTM_DELTFILTER, false);
1479
- tcf_proto_destroy(tp, extack);
1480
- err = 0;
1481
- } else {
1482
- NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1483
- err = -ENOENT;
1484
- }
2303
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2304
+ err = -ENOENT;
14852305 } else {
14862306 bool last;
14872307
14882308 err = tfilter_del_notify(net, skb, n, tp, block,
14892309 q, parent, fh, false, &last,
1490
- extack);
2310
+ rtnl_held, extack);
2311
+
14912312 if (err)
14922313 goto errout;
1493
- if (last) {
1494
- tcf_chain_tp_remove(chain, &chain_info, tp);
1495
- tcf_proto_destroy(tp, extack);
1496
- }
2314
+ if (last)
2315
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
14972316 }
14982317
14992318 errout:
1500
- if (chain)
2319
+ if (chain) {
2320
+ if (tp && !IS_ERR(tp))
2321
+ tcf_proto_put(tp, rtnl_held, NULL);
15012322 tcf_chain_put(chain);
1502
- tcf_block_release(q, block);
2323
+ }
2324
+ tcf_block_release(q, block, rtnl_held);
2325
+
2326
+ if (rtnl_held)
2327
+ rtnl_unlock();
2328
+
15032329 return err;
2330
+
2331
+errout_locked:
2332
+ mutex_unlock(&chain->filter_chain_lock);
2333
+ goto errout;
15042334 }
15052335
15062336 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1508,6 +2338,7 @@
15082338 {
15092339 struct net *net = sock_net(skb->sk);
15102340 struct nlattr *tca[TCA_MAX + 1];
2341
+ char name[IFNAMSIZ];
15112342 struct tcmsg *t;
15122343 u32 protocol;
15132344 u32 prio;
....@@ -1516,13 +2347,15 @@
15162347 struct Qdisc *q = NULL;
15172348 struct tcf_chain_info chain_info;
15182349 struct tcf_chain *chain = NULL;
1519
- struct tcf_block *block;
2350
+ struct tcf_block *block = NULL;
15202351 struct tcf_proto *tp = NULL;
15212352 unsigned long cl = 0;
15222353 void *fh = NULL;
15232354 int err;
2355
+ bool rtnl_held = false;
15242356
1525
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2357
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2358
+ rtm_tca_policy, extack);
15262359 if (err < 0)
15272360 return err;
15282361
....@@ -1538,8 +2371,31 @@
15382371
15392372 /* Find head of filter chain. */
15402373
1541
- block = tcf_block_find(net, &q, &parent, &cl,
1542
- t->tcm_ifindex, t->tcm_block_index, extack);
2374
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2375
+ if (err)
2376
+ return err;
2377
+
2378
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2379
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2380
+ err = -EINVAL;
2381
+ goto errout;
2382
+ }
2383
+ /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2384
+ * unlocked, classifier type is not specified, classifier is not
2385
+ * unlocked.
2386
+ */
2387
+ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2388
+ !tcf_proto_is_unlocked(name)) {
2389
+ rtnl_held = true;
2390
+ rtnl_lock();
2391
+ }
2392
+
2393
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2394
+ if (err)
2395
+ goto errout;
2396
+
2397
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2398
+ extack);
15432399 if (IS_ERR(block)) {
15442400 err = PTR_ERR(block);
15452401 goto errout;
....@@ -1558,8 +2414,10 @@
15582414 goto errout;
15592415 }
15602416
2417
+ mutex_lock(&chain->filter_chain_lock);
15612418 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
15622419 prio, false);
2420
+ mutex_unlock(&chain->filter_chain_lock);
15632421 if (!tp || IS_ERR(tp)) {
15642422 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
15652423 err = tp ? PTR_ERR(tp) : -ENOENT;
....@@ -1577,15 +2435,23 @@
15772435 err = -ENOENT;
15782436 } else {
15792437 err = tfilter_notify(net, skb, n, tp, block, q, parent,
1580
- fh, RTM_NEWTFILTER, true);
2438
+ fh, RTM_NEWTFILTER, true, rtnl_held);
15812439 if (err < 0)
15822440 NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
15832441 }
15842442
2443
+ tfilter_put(tp, fh);
15852444 errout:
1586
- if (chain)
2445
+ if (chain) {
2446
+ if (tp && !IS_ERR(tp))
2447
+ tcf_proto_put(tp, rtnl_held, NULL);
15872448 tcf_chain_put(chain);
1588
- tcf_block_release(q, block);
2449
+ }
2450
+ tcf_block_release(q, block, rtnl_held);
2451
+
2452
+ if (rtnl_held)
2453
+ rtnl_unlock();
2454
+
15892455 return err;
15902456 }
15912457
....@@ -1596,6 +2462,7 @@
15962462 struct tcf_block *block;
15972463 struct Qdisc *q;
15982464 u32 parent;
2465
+ bool terse_dump;
15992466 };
16002467
16012468 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
....@@ -1606,21 +2473,25 @@
16062473 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
16072474 n, NETLINK_CB(a->cb->skb).portid,
16082475 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1609
- RTM_NEWTFILTER);
2476
+ RTM_NEWTFILTER, a->terse_dump, true);
16102477 }
16112478
16122479 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
16132480 struct sk_buff *skb, struct netlink_callback *cb,
1614
- long index_start, long *p_index)
2481
+ long index_start, long *p_index, bool terse)
16152482 {
16162483 struct net *net = sock_net(skb->sk);
16172484 struct tcf_block *block = chain->block;
16182485 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2486
+ struct tcf_proto *tp, *tp_prev;
16192487 struct tcf_dump_args arg;
1620
- struct tcf_proto *tp;
16212488
1622
- for (tp = rtnl_dereference(chain->filter_chain);
1623
- tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
2489
+ for (tp = __tcf_get_next_proto(chain, NULL);
2490
+ tp;
2491
+ tp_prev = tp,
2492
+ tp = __tcf_get_next_proto(chain, tp),
2493
+ tcf_proto_put(tp_prev, true, NULL),
2494
+ (*p_index)++) {
16242495 if (*p_index < index_start)
16252496 continue;
16262497 if (TC_H_MAJ(tcm->tcm_info) &&
....@@ -1636,9 +2507,8 @@
16362507 if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
16372508 NETLINK_CB(cb->skb).portid,
16382509 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1639
- RTM_NEWTFILTER) <= 0)
1640
- return false;
1641
-
2510
+ RTM_NEWTFILTER, false, true) <= 0)
2511
+ goto errout;
16422512 cb->args[1] = 1;
16432513 }
16442514 if (!tp->ops->walk)
....@@ -1653,24 +2523,34 @@
16532523 arg.w.skip = cb->args[1] - 1;
16542524 arg.w.count = 0;
16552525 arg.w.cookie = cb->args[2];
1656
- tp->ops->walk(tp, &arg.w);
2526
+ arg.terse_dump = terse;
2527
+ tp->ops->walk(tp, &arg.w, true);
16572528 cb->args[2] = arg.w.cookie;
16582529 cb->args[1] = arg.w.count + 1;
16592530 if (arg.w.stop)
1660
- return false;
2531
+ goto errout;
16612532 }
16622533 return true;
2534
+
2535
+errout:
2536
+ tcf_proto_put(tp, true, NULL);
2537
+ return false;
16632538 }
2539
+
2540
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
2541
+ [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
2542
+};
16642543
16652544 /* called with RTNL */
16662545 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
16672546 {
2547
+ struct tcf_chain *chain, *chain_prev;
16682548 struct net *net = sock_net(skb->sk);
16692549 struct nlattr *tca[TCA_MAX + 1];
16702550 struct Qdisc *q = NULL;
16712551 struct tcf_block *block;
1672
- struct tcf_chain *chain;
16732552 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2553
+ bool terse_dump = false;
16742554 long index_start;
16752555 long index;
16762556 u32 parent;
....@@ -1679,12 +2559,20 @@
16792559 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
16802560 return skb->len;
16812561
1682
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
2562
+ err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2563
+ tcf_tfilter_dump_policy, cb->extack);
16832564 if (err)
16842565 return err;
16852566
2567
+ if (tca[TCA_DUMP_FLAGS]) {
2568
+ struct nla_bitfield32 flags =
2569
+ nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
2570
+
2571
+ terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
2572
+ }
2573
+
16862574 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1687
- block = tcf_block_lookup(net, tcm->tcm_block_index);
2575
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
16882576 if (!block)
16892577 goto out;
16902578 /* If we work with block index, q is NULL and parent value
....@@ -1704,12 +2592,10 @@
17042592 return skb->len;
17052593
17062594 parent = tcm->tcm_parent;
1707
- if (!parent) {
1708
- q = dev->qdisc;
1709
- parent = q->handle;
1710
- } else {
2595
+ if (!parent)
2596
+ q = rtnl_dereference(dev->qdisc);
2597
+ else
17112598 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1712
- }
17132599 if (!q)
17142600 goto out;
17152601 cops = q->ops->cl_ops;
....@@ -1725,6 +2611,7 @@
17252611 block = cops->tcf_block(q, cl, NULL);
17262612 if (!block)
17272613 goto out;
2614
+ parent = block->classid;
17282615 if (tcf_block_shared(block))
17292616 q = NULL;
17302617 }
....@@ -1732,17 +2619,24 @@
17322619 index_start = cb->args[0];
17332620 index = 0;
17342621
1735
- list_for_each_entry(chain, &block->chain_list, list) {
2622
+ for (chain = __tcf_get_next_chain(block, NULL);
2623
+ chain;
2624
+ chain_prev = chain,
2625
+ chain = __tcf_get_next_chain(block, chain),
2626
+ tcf_chain_put(chain_prev)) {
17362627 if (tca[TCA_CHAIN] &&
17372628 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
17382629 continue;
17392630 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1740
- index_start, &index)) {
2631
+ index_start, &index, terse_dump)) {
2632
+ tcf_chain_put(chain);
17412633 err = -EMSGSIZE;
17422634 break;
17432635 }
17442636 }
17452637
2638
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2639
+ tcf_block_refcnt_put(block, true);
17462640 cb->args[0] = index;
17472641
17482642 out:
....@@ -1752,8 +2646,10 @@
17522646 return skb->len;
17532647 }
17542648
1755
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
1756
- struct sk_buff *skb, struct tcf_block *block,
2649
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2650
+ void *tmplt_priv, u32 chain_index,
2651
+ struct net *net, struct sk_buff *skb,
2652
+ struct tcf_block *block,
17572653 u32 portid, u32 seq, u16 flags, int event)
17582654 {
17592655 unsigned char *b = skb_tail_pointer(skb);
....@@ -1762,8 +2658,8 @@
17622658 struct tcmsg *tcm;
17632659 void *priv;
17642660
1765
- ops = chain->tmplt_ops;
1766
- priv = chain->tmplt_priv;
2661
+ ops = tmplt_ops;
2662
+ priv = tmplt_priv;
17672663
17682664 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
17692665 if (!nlh)
....@@ -1781,7 +2677,7 @@
17812677 tcm->tcm_block_index = block->index;
17822678 }
17832679
1784
- if (nla_put_u32(skb, TCA_CHAIN, chain->index))
2680
+ if (nla_put_u32(skb, TCA_CHAIN, chain_index))
17852681 goto nla_put_failure;
17862682
17872683 if (ops) {
....@@ -1807,13 +2703,45 @@
18072703 struct tcf_block *block = chain->block;
18082704 struct net *net = block->net;
18092705 struct sk_buff *skb;
2706
+ int err = 0;
18102707
18112708 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
18122709 if (!skb)
18132710 return -ENOBUFS;
18142711
1815
- if (tc_chain_fill_node(chain, net, skb, block, portid,
2712
+ if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2713
+ chain->index, net, skb, block, portid,
18162714 seq, flags, event) <= 0) {
2715
+ kfree_skb(skb);
2716
+ return -EINVAL;
2717
+ }
2718
+
2719
+ if (unicast)
2720
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2721
+ else
2722
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2723
+ flags & NLM_F_ECHO);
2724
+
2725
+ if (err > 0)
2726
+ err = 0;
2727
+ return err;
2728
+}
2729
+
2730
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2731
+ void *tmplt_priv, u32 chain_index,
2732
+ struct tcf_block *block, struct sk_buff *oskb,
2733
+ u32 seq, u16 flags, bool unicast)
2734
+{
2735
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2736
+ struct net *net = block->net;
2737
+ struct sk_buff *skb;
2738
+
2739
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2740
+ if (!skb)
2741
+ return -ENOBUFS;
2742
+
2743
+ if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2744
+ block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
18172745 kfree_skb(skb);
18182746 return -EINVAL;
18192747 }
....@@ -1829,17 +2757,24 @@
18292757 struct netlink_ext_ack *extack)
18302758 {
18312759 const struct tcf_proto_ops *ops;
2760
+ char name[IFNAMSIZ];
18322761 void *tmplt_priv;
18332762
18342763 /* If kind is not set, user did not specify template. */
18352764 if (!tca[TCA_KIND])
18362765 return 0;
18372766
1838
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
2767
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2768
+ NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2769
+ return -EINVAL;
2770
+ }
2771
+
2772
+ ops = tcf_proto_lookup_ops(name, true, extack);
18392773 if (IS_ERR(ops))
18402774 return PTR_ERR(ops);
18412775 if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
18422776 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2777
+ module_put(ops->owner);
18432778 return -EOPNOTSUPP;
18442779 }
18452780
....@@ -1853,16 +2788,15 @@
18532788 return 0;
18542789 }
18552790
1856
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
2791
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2792
+ void *tmplt_priv)
18572793 {
1858
- const struct tcf_proto_ops *ops = chain->tmplt_ops;
1859
-
18602794 /* If template ops are set, no work to do for us. */
1861
- if (!ops)
2795
+ if (!tmplt_ops)
18622796 return;
18632797
1864
- ops->tmplt_destroy(chain->tmplt_priv);
1865
- module_put(ops->owner);
2798
+ tmplt_ops->tmplt_destroy(tmplt_priv);
2799
+ module_put(tmplt_ops->owner);
18662800 }
18672801
18682802 /* Add/delete/get a chain */
....@@ -1875,8 +2809,8 @@
18752809 struct tcmsg *t;
18762810 u32 parent;
18772811 u32 chain_index;
1878
- struct Qdisc *q = NULL;
1879
- struct tcf_chain *chain = NULL;
2812
+ struct Qdisc *q;
2813
+ struct tcf_chain *chain;
18802814 struct tcf_block *block;
18812815 unsigned long cl;
18822816 int err;
....@@ -1886,7 +2820,9 @@
18862820 return -EPERM;
18872821
18882822 replay:
1889
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2823
+ q = NULL;
2824
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2825
+ rtm_tca_policy, extack);
18902826 if (err < 0)
18912827 return err;
18922828
....@@ -1905,6 +2841,8 @@
19052841 err = -EINVAL;
19062842 goto errout_block;
19072843 }
2844
+
2845
+ mutex_lock(&block->lock);
19082846 chain = tcf_chain_lookup(block, chain_index);
19092847 if (n->nlmsg_type == RTM_NEWCHAIN) {
19102848 if (chain) {
....@@ -1916,54 +2854,61 @@
19162854 } else {
19172855 NL_SET_ERR_MSG(extack, "Filter chain already exists");
19182856 err = -EEXIST;
1919
- goto errout_block;
2857
+ goto errout_block_locked;
19202858 }
19212859 } else {
19222860 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
19232861 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
19242862 err = -ENOENT;
1925
- goto errout_block;
2863
+ goto errout_block_locked;
19262864 }
19272865 chain = tcf_chain_create(block, chain_index);
19282866 if (!chain) {
19292867 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
19302868 err = -ENOMEM;
1931
- goto errout_block;
2869
+ goto errout_block_locked;
19322870 }
19332871 }
19342872 } else {
19352873 if (!chain || tcf_chain_held_by_acts_only(chain)) {
19362874 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
19372875 err = -EINVAL;
1938
- goto errout_block;
2876
+ goto errout_block_locked;
19392877 }
19402878 tcf_chain_hold(chain);
19412879 }
19422880
1943
- switch (n->nlmsg_type) {
1944
- case RTM_NEWCHAIN:
1945
- err = tc_chain_tmplt_add(chain, net, tca, extack);
1946
- if (err)
1947
- goto errout;
1948
- /* In case the chain was successfully added, take a reference
1949
- * to the chain. This ensures that an empty chain
1950
- * does not disappear at the end of this function.
2881
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
2882
+ /* Modifying chain requires holding parent block lock. In case
2883
+ * the chain was successfully added, take a reference to the
2884
+ * chain. This ensures that an empty chain does not disappear at
2885
+ * the end of this function.
19512886 */
19522887 tcf_chain_hold(chain);
19532888 chain->explicitly_created = true;
2889
+ }
2890
+ mutex_unlock(&block->lock);
2891
+
2892
+ switch (n->nlmsg_type) {
2893
+ case RTM_NEWCHAIN:
2894
+ err = tc_chain_tmplt_add(chain, net, tca, extack);
2895
+ if (err) {
2896
+ tcf_chain_put_explicitly_created(chain);
2897
+ goto errout;
2898
+ }
2899
+
19542900 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
19552901 RTM_NEWCHAIN, false);
19562902 break;
19572903 case RTM_DELCHAIN:
19582904 tfilter_notify_chain(net, skb, block, q, parent, n,
1959
- chain, RTM_DELTFILTER);
2905
+ chain, RTM_DELTFILTER, true);
19602906 /* Flush the chain first as the user requested chain removal. */
1961
- tcf_chain_flush(chain);
2907
+ tcf_chain_flush(chain, true);
19622908 /* In case the chain was successfully deleted, put a reference
19632909 * to the chain previously taken during addition.
19642910 */
19652911 tcf_chain_put_explicitly_created(chain);
1966
- chain->explicitly_created = false;
19672912 break;
19682913 case RTM_GETCHAIN:
19692914 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
....@@ -1980,11 +2925,15 @@
19802925 errout:
19812926 tcf_chain_put(chain);
19822927 errout_block:
1983
- tcf_block_release(q, block);
2928
+ tcf_block_release(q, block, true);
19842929 if (err == -EAGAIN)
19852930 /* Replay the request. */
19862931 goto replay;
19872932 return err;
2933
+
2934
+errout_block_locked:
2935
+ mutex_unlock(&block->lock);
2936
+ goto errout_block;
19882937 }
19892938
19902939 /* called with RTNL */
....@@ -1994,8 +2943,8 @@
19942943 struct nlattr *tca[TCA_MAX + 1];
19952944 struct Qdisc *q = NULL;
19962945 struct tcf_block *block;
1997
- struct tcf_chain *chain;
19982946 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2947
+ struct tcf_chain *chain;
19992948 long index_start;
20002949 long index;
20012950 u32 parent;
....@@ -2004,13 +2953,13 @@
20042953 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
20052954 return skb->len;
20062955
2007
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
2008
- NULL);
2956
+ err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2957
+ rtm_tca_policy, cb->extack);
20092958 if (err)
20102959 return err;
20112960
20122961 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2013
- block = tcf_block_lookup(net, tcm->tcm_block_index);
2962
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
20142963 if (!block)
20152964 goto out;
20162965 /* If we work with block index, q is NULL and parent value
....@@ -2031,7 +2980,7 @@
20312980
20322981 parent = tcm->tcm_parent;
20332982 if (!parent) {
2034
- q = dev->qdisc;
2983
+ q = rtnl_dereference(dev->qdisc);
20352984 parent = q->handle;
20362985 } else {
20372986 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
....@@ -2058,6 +3007,7 @@
20583007 index_start = cb->args[0];
20593008 index = 0;
20603009
3010
+ mutex_lock(&block->lock);
20613011 list_for_each_entry(chain, &block->chain_list, list) {
20623012 if ((tca[TCA_CHAIN] &&
20633013 nla_get_u32(tca[TCA_CHAIN]) != chain->index))
....@@ -2068,7 +3018,8 @@
20683018 }
20693019 if (tcf_chain_held_by_acts_only(chain))
20703020 continue;
2071
- err = tc_chain_fill_node(chain, net, skb, block,
3021
+ err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
3022
+ chain->index, net, skb, block,
20723023 NETLINK_CB(cb->skb).portid,
20733024 cb->nlh->nlmsg_seq, NLM_F_MULTI,
20743025 RTM_NEWCHAIN);
....@@ -2076,7 +3027,10 @@
20763027 break;
20773028 index++;
20783029 }
3030
+ mutex_unlock(&block->lock);
20793031
3032
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3033
+ tcf_block_refcnt_put(block, true);
20803034 cb->args[0] = index;
20813035
20823036 out:
....@@ -2100,35 +3054,43 @@
21003054
21013055 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
21023056 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
2103
- struct netlink_ext_ack *extack)
3057
+ bool rtnl_held, struct netlink_ext_ack *extack)
21043058 {
21053059 #ifdef CONFIG_NET_CLS_ACT
21063060 {
3061
+ int init_res[TCA_ACT_MAX_PRIO] = {};
21073062 struct tc_action *act;
21083063 size_t attr_size = 0;
21093064
21103065 if (exts->police && tb[exts->police]) {
3066
+ struct tc_action_ops *a_o;
3067
+
3068
+ a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
3069
+ if (IS_ERR(a_o))
3070
+ return PTR_ERR(a_o);
21113071 act = tcf_action_init_1(net, tp, tb[exts->police],
21123072 rate_tlv, "police", ovr,
2113
- TCA_ACT_BIND, true, extack);
3073
+ TCA_ACT_BIND, a_o, init_res,
3074
+ rtnl_held, extack);
3075
+ module_put(a_o->owner);
21143076 if (IS_ERR(act))
21153077 return PTR_ERR(act);
21163078
21173079 act->type = exts->type = TCA_OLD_COMPAT;
21183080 exts->actions[0] = act;
21193081 exts->nr_actions = 1;
3082
+ tcf_idr_insert_many(exts->actions);
21203083 } else if (exts->action && tb[exts->action]) {
21213084 int err;
21223085
21233086 err = tcf_action_init(net, tp, tb[exts->action],
21243087 rate_tlv, NULL, ovr, TCA_ACT_BIND,
2125
- exts->actions, &attr_size, true,
2126
- extack);
3088
+ exts->actions, init_res,
3089
+ &attr_size, rtnl_held, extack);
21273090 if (err < 0)
21283091 return err;
21293092 exts->nr_actions = err;
21303093 }
2131
- exts->net = net;
21323094 }
21333095 #else
21343096 if ((exts->action && tb[exts->action]) ||
....@@ -2175,16 +3137,17 @@
21753137 * tc data even if iproute2 was newer - jhs
21763138 */
21773139 if (exts->type != TCA_OLD_COMPAT) {
2178
- nest = nla_nest_start(skb, exts->action);
3140
+ nest = nla_nest_start_noflag(skb, exts->action);
21793141 if (nest == NULL)
21803142 goto nla_put_failure;
21813143
2182
- if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3144
+ if (tcf_action_dump(skb, exts->actions, 0, 0, false)
3145
+ < 0)
21833146 goto nla_put_failure;
21843147 nla_nest_end(skb, nest);
21853148 } else if (exts->police) {
21863149 struct tc_action *act = tcf_exts_first_act(exts);
2187
- nest = nla_nest_start(skb, exts->police);
3150
+ nest = nla_nest_start_noflag(skb, exts->police);
21883151 if (nest == NULL || !act)
21893152 goto nla_put_failure;
21903153 if (tcf_action_dump_old(skb, act, 0, 0) < 0)
....@@ -2203,6 +3166,31 @@
22033166 }
22043167 EXPORT_SYMBOL(tcf_exts_dump);
22053168
3169
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
3170
+{
3171
+#ifdef CONFIG_NET_CLS_ACT
3172
+ struct nlattr *nest;
3173
+
3174
+ if (!exts->action || !tcf_exts_has_actions(exts))
3175
+ return 0;
3176
+
3177
+ nest = nla_nest_start_noflag(skb, exts->action);
3178
+ if (!nest)
3179
+ goto nla_put_failure;
3180
+
3181
+ if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
3182
+ goto nla_put_failure;
3183
+ nla_nest_end(skb, nest);
3184
+ return 0;
3185
+
3186
+nla_put_failure:
3187
+ nla_nest_cancel(skb, nest);
3188
+ return -1;
3189
+#else
3190
+ return 0;
3191
+#endif
3192
+}
3193
+EXPORT_SYMBOL(tcf_exts_terse_dump);
22063194
22073195 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
22083196 {
....@@ -2215,62 +3203,687 @@
22153203 }
22163204 EXPORT_SYMBOL(tcf_exts_dump_stats);
22173205
2218
-static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
2219
- enum tc_setup_type type,
2220
- void *type_data, bool err_stop)
3206
+static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
22213207 {
2222
- int ok_count = 0;
2223
-#ifdef CONFIG_NET_CLS_ACT
2224
- const struct tc_action *a;
2225
- struct net_device *dev;
2226
- int i, ret;
3208
+ if (*flags & TCA_CLS_FLAGS_IN_HW)
3209
+ return;
3210
+ *flags |= TCA_CLS_FLAGS_IN_HW;
3211
+ atomic_inc(&block->offloadcnt);
3212
+}
22273213
2228
- if (!tcf_exts_has_actions(exts))
2229
- return 0;
3214
+static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3215
+{
3216
+ if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3217
+ return;
3218
+ *flags &= ~TCA_CLS_FLAGS_IN_HW;
3219
+ atomic_dec(&block->offloadcnt);
3220
+}
22303221
2231
- for (i = 0; i < exts->nr_actions; i++) {
2232
- a = exts->actions[i];
2233
- if (!a->ops->get_dev)
2234
- continue;
2235
- dev = a->ops->get_dev(a);
2236
- if (!dev)
2237
- continue;
2238
- ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
2239
- a->ops->put_dev(dev);
2240
- if (ret < 0)
2241
- return ret;
2242
- ok_count += ret;
3222
+static void tc_cls_offload_cnt_update(struct tcf_block *block,
3223
+ struct tcf_proto *tp, u32 *cnt,
3224
+ u32 *flags, u32 diff, bool add)
3225
+{
3226
+ lockdep_assert_held(&block->cb_lock);
3227
+
3228
+ spin_lock(&tp->lock);
3229
+ if (add) {
3230
+ if (!*cnt)
3231
+ tcf_block_offload_inc(block, flags);
3232
+ *cnt += diff;
3233
+ } else {
3234
+ *cnt -= diff;
3235
+ if (!*cnt)
3236
+ tcf_block_offload_dec(block, flags);
22433237 }
2244
-#endif
3238
+ spin_unlock(&tp->lock);
3239
+}
3240
+
3241
+static void
3242
+tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3243
+ u32 *cnt, u32 *flags)
3244
+{
3245
+ lockdep_assert_held(&block->cb_lock);
3246
+
3247
+ spin_lock(&tp->lock);
3248
+ tcf_block_offload_dec(block, flags);
3249
+ *cnt = 0;
3250
+ spin_unlock(&tp->lock);
3251
+}
3252
+
3253
+static int
3254
+__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3255
+ void *type_data, bool err_stop)
3256
+{
3257
+ struct flow_block_cb *block_cb;
3258
+ int ok_count = 0;
3259
+ int err;
3260
+
3261
+ list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3262
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
3263
+ if (err) {
3264
+ if (err_stop)
3265
+ return err;
3266
+ } else {
3267
+ ok_count++;
3268
+ }
3269
+ }
22453270 return ok_count;
22463271 }
22473272
2248
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
2249
- enum tc_setup_type type, void *type_data, bool err_stop)
3273
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3274
+ void *type_data, bool err_stop, bool rtnl_held)
22503275 {
3276
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
22513277 int ok_count;
2252
- int ret;
22533278
2254
- ret = tcf_block_cb_call(block, type, type_data, err_stop);
2255
- if (ret < 0)
2256
- return ret;
2257
- ok_count = ret;
3279
+retry:
3280
+ if (take_rtnl)
3281
+ rtnl_lock();
3282
+ down_read(&block->cb_lock);
3283
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3284
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3285
+ * obtain the locks in same order here.
3286
+ */
3287
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3288
+ up_read(&block->cb_lock);
3289
+ take_rtnl = true;
3290
+ goto retry;
3291
+ }
22583292
2259
- if (!exts || ok_count)
2260
- return ok_count;
2261
- ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
2262
- if (ret < 0)
2263
- return ret;
2264
- ok_count += ret;
3293
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
22653294
3295
+ up_read(&block->cb_lock);
3296
+ if (take_rtnl)
3297
+ rtnl_unlock();
22663298 return ok_count;
22673299 }
22683300 EXPORT_SYMBOL(tc_setup_cb_call);
3301
+
3302
+/* Non-destructive filter add. If filter that wasn't already in hardware is
3303
+ * successfully offloaded, increment block offloads counter. On failure,
3304
+ * previously offloaded filter is considered to be intact and offloads counter
3305
+ * is not decremented.
3306
+ */
3307
+
3308
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3309
+ enum tc_setup_type type, void *type_data, bool err_stop,
3310
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3311
+{
3312
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3313
+ int ok_count;
3314
+
3315
+retry:
3316
+ if (take_rtnl)
3317
+ rtnl_lock();
3318
+ down_read(&block->cb_lock);
3319
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3320
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3321
+ * obtain the locks in same order here.
3322
+ */
3323
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3324
+ up_read(&block->cb_lock);
3325
+ take_rtnl = true;
3326
+ goto retry;
3327
+ }
3328
+
3329
+ /* Make sure all netdevs sharing this block are offload-capable. */
3330
+ if (block->nooffloaddevcnt && err_stop) {
3331
+ ok_count = -EOPNOTSUPP;
3332
+ goto err_unlock;
3333
+ }
3334
+
3335
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3336
+ if (ok_count < 0)
3337
+ goto err_unlock;
3338
+
3339
+ if (tp->ops->hw_add)
3340
+ tp->ops->hw_add(tp, type_data);
3341
+ if (ok_count > 0)
3342
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3343
+ ok_count, true);
3344
+err_unlock:
3345
+ up_read(&block->cb_lock);
3346
+ if (take_rtnl)
3347
+ rtnl_unlock();
3348
+ return ok_count < 0 ? ok_count : 0;
3349
+}
3350
+EXPORT_SYMBOL(tc_setup_cb_add);
3351
+
3352
+/* Destructive filter replace. If filter that wasn't already in hardware is
3353
+ * successfully offloaded, increment block offload counter. On failure,
3354
+ * previously offloaded filter is considered to be destroyed and offload counter
3355
+ * is decremented.
3356
+ */
3357
+
3358
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3359
+ enum tc_setup_type type, void *type_data, bool err_stop,
3360
+ u32 *old_flags, unsigned int *old_in_hw_count,
3361
+ u32 *new_flags, unsigned int *new_in_hw_count,
3362
+ bool rtnl_held)
3363
+{
3364
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3365
+ int ok_count;
3366
+
3367
+retry:
3368
+ if (take_rtnl)
3369
+ rtnl_lock();
3370
+ down_read(&block->cb_lock);
3371
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3372
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3373
+ * obtain the locks in same order here.
3374
+ */
3375
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3376
+ up_read(&block->cb_lock);
3377
+ take_rtnl = true;
3378
+ goto retry;
3379
+ }
3380
+
3381
+ /* Make sure all netdevs sharing this block are offload-capable. */
3382
+ if (block->nooffloaddevcnt && err_stop) {
3383
+ ok_count = -EOPNOTSUPP;
3384
+ goto err_unlock;
3385
+ }
3386
+
3387
+ tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3388
+ if (tp->ops->hw_del)
3389
+ tp->ops->hw_del(tp, type_data);
3390
+
3391
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3392
+ if (ok_count < 0)
3393
+ goto err_unlock;
3394
+
3395
+ if (tp->ops->hw_add)
3396
+ tp->ops->hw_add(tp, type_data);
3397
+ if (ok_count > 0)
3398
+ tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3399
+ new_flags, ok_count, true);
3400
+err_unlock:
3401
+ up_read(&block->cb_lock);
3402
+ if (take_rtnl)
3403
+ rtnl_unlock();
3404
+ return ok_count < 0 ? ok_count : 0;
3405
+}
3406
+EXPORT_SYMBOL(tc_setup_cb_replace);
3407
+
3408
+/* Destroy filter and decrement block offload counter, if filter was previously
3409
+ * offloaded.
3410
+ */
3411
+
3412
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3413
+ enum tc_setup_type type, void *type_data, bool err_stop,
3414
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3415
+{
3416
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3417
+ int ok_count;
3418
+
3419
+retry:
3420
+ if (take_rtnl)
3421
+ rtnl_lock();
3422
+ down_read(&block->cb_lock);
3423
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3424
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3425
+ * obtain the locks in same order here.
3426
+ */
3427
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3428
+ up_read(&block->cb_lock);
3429
+ take_rtnl = true;
3430
+ goto retry;
3431
+ }
3432
+
3433
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3434
+
3435
+ tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3436
+ if (tp->ops->hw_del)
3437
+ tp->ops->hw_del(tp, type_data);
3438
+
3439
+ up_read(&block->cb_lock);
3440
+ if (take_rtnl)
3441
+ rtnl_unlock();
3442
+ return ok_count < 0 ? ok_count : 0;
3443
+}
3444
+EXPORT_SYMBOL(tc_setup_cb_destroy);
3445
+
3446
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3447
+ bool add, flow_setup_cb_t *cb,
3448
+ enum tc_setup_type type, void *type_data,
3449
+ void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3450
+{
3451
+ int err = cb(type, type_data, cb_priv);
3452
+
3453
+ if (err) {
3454
+ if (add && tc_skip_sw(*flags))
3455
+ return err;
3456
+ } else {
3457
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3458
+ add);
3459
+ }
3460
+
3461
+ return 0;
3462
+}
3463
+EXPORT_SYMBOL(tc_setup_cb_reoffload);
3464
+
3465
+static int tcf_act_get_cookie(struct flow_action_entry *entry,
3466
+ const struct tc_action *act)
3467
+{
3468
+ struct tc_cookie *cookie;
3469
+ int err = 0;
3470
+
3471
+ rcu_read_lock();
3472
+ cookie = rcu_dereference(act->act_cookie);
3473
+ if (cookie) {
3474
+ entry->cookie = flow_action_cookie_create(cookie->data,
3475
+ cookie->len,
3476
+ GFP_ATOMIC);
3477
+ if (!entry->cookie)
3478
+ err = -ENOMEM;
3479
+ }
3480
+ rcu_read_unlock();
3481
+ return err;
3482
+}
3483
+
3484
+static void tcf_act_put_cookie(struct flow_action_entry *entry)
3485
+{
3486
+ flow_action_cookie_destroy(entry->cookie);
3487
+}
3488
+
3489
+void tc_cleanup_flow_action(struct flow_action *flow_action)
3490
+{
3491
+ struct flow_action_entry *entry;
3492
+ int i;
3493
+
3494
+ flow_action_for_each(i, entry, flow_action) {
3495
+ tcf_act_put_cookie(entry);
3496
+ if (entry->destructor)
3497
+ entry->destructor(entry->destructor_priv);
3498
+ }
3499
+}
3500
+EXPORT_SYMBOL(tc_cleanup_flow_action);
3501
+
3502
+static void tcf_mirred_get_dev(struct flow_action_entry *entry,
3503
+ const struct tc_action *act)
3504
+{
3505
+#ifdef CONFIG_NET_CLS_ACT
3506
+ entry->dev = act->ops->get_dev(act, &entry->destructor);
3507
+ if (!entry->dev)
3508
+ return;
3509
+ entry->destructor_priv = entry->dev;
3510
+#endif
3511
+}
3512
+
3513
+static void tcf_tunnel_encap_put_tunnel(void *priv)
3514
+{
3515
+ struct ip_tunnel_info *tunnel = priv;
3516
+
3517
+ kfree(tunnel);
3518
+}
3519
+
3520
+static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
3521
+ const struct tc_action *act)
3522
+{
3523
+ entry->tunnel = tcf_tunnel_info_copy(act);
3524
+ if (!entry->tunnel)
3525
+ return -ENOMEM;
3526
+ entry->destructor = tcf_tunnel_encap_put_tunnel;
3527
+ entry->destructor_priv = entry->tunnel;
3528
+ return 0;
3529
+}
3530
+
3531
+static void tcf_sample_get_group(struct flow_action_entry *entry,
3532
+ const struct tc_action *act)
3533
+{
3534
+#ifdef CONFIG_NET_CLS_ACT
3535
+ entry->sample.psample_group =
3536
+ act->ops->get_psample_group(act, &entry->destructor);
3537
+ entry->destructor_priv = entry->sample.psample_group;
3538
+#endif
3539
+}
3540
+
3541
+static void tcf_gate_entry_destructor(void *priv)
3542
+{
3543
+ struct action_gate_entry *oe = priv;
3544
+
3545
+ kfree(oe);
3546
+}
3547
+
3548
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
3549
+ const struct tc_action *act)
3550
+{
3551
+ entry->gate.entries = tcf_gate_get_list(act);
3552
+
3553
+ if (!entry->gate.entries)
3554
+ return -EINVAL;
3555
+
3556
+ entry->destructor = tcf_gate_entry_destructor;
3557
+ entry->destructor_priv = entry->gate.entries;
3558
+
3559
+ return 0;
3560
+}
3561
+
3562
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
3563
+{
3564
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
3565
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
3566
+ else if (!hw_stats)
3567
+ return FLOW_ACTION_HW_STATS_DISABLED;
3568
+
3569
+ return hw_stats;
3570
+}
3571
+
3572
+int tc_setup_flow_action(struct flow_action *flow_action,
3573
+ const struct tcf_exts *exts)
3574
+{
3575
+ struct tc_action *act;
3576
+ int i, j, k, err = 0;
3577
+
3578
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
3579
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
3580
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3581
+
3582
+ if (!exts)
3583
+ return 0;
3584
+
3585
+ j = 0;
3586
+ tcf_exts_for_each_action(i, act, exts) {
3587
+ struct flow_action_entry *entry;
3588
+
3589
+ entry = &flow_action->entries[j];
3590
+ spin_lock_bh(&act->tcfa_lock);
3591
+ err = tcf_act_get_cookie(entry, act);
3592
+ if (err)
3593
+ goto err_out_locked;
3594
+
3595
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3596
+
3597
+ if (is_tcf_gact_ok(act)) {
3598
+ entry->id = FLOW_ACTION_ACCEPT;
3599
+ } else if (is_tcf_gact_shot(act)) {
3600
+ entry->id = FLOW_ACTION_DROP;
3601
+ } else if (is_tcf_gact_trap(act)) {
3602
+ entry->id = FLOW_ACTION_TRAP;
3603
+ } else if (is_tcf_gact_goto_chain(act)) {
3604
+ entry->id = FLOW_ACTION_GOTO;
3605
+ entry->chain_index = tcf_gact_goto_chain_index(act);
3606
+ } else if (is_tcf_mirred_egress_redirect(act)) {
3607
+ entry->id = FLOW_ACTION_REDIRECT;
3608
+ tcf_mirred_get_dev(entry, act);
3609
+ } else if (is_tcf_mirred_egress_mirror(act)) {
3610
+ entry->id = FLOW_ACTION_MIRRED;
3611
+ tcf_mirred_get_dev(entry, act);
3612
+ } else if (is_tcf_mirred_ingress_redirect(act)) {
3613
+ entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3614
+ tcf_mirred_get_dev(entry, act);
3615
+ } else if (is_tcf_mirred_ingress_mirror(act)) {
3616
+ entry->id = FLOW_ACTION_MIRRED_INGRESS;
3617
+ tcf_mirred_get_dev(entry, act);
3618
+ } else if (is_tcf_vlan(act)) {
3619
+ switch (tcf_vlan_action(act)) {
3620
+ case TCA_VLAN_ACT_PUSH:
3621
+ entry->id = FLOW_ACTION_VLAN_PUSH;
3622
+ entry->vlan.vid = tcf_vlan_push_vid(act);
3623
+ entry->vlan.proto = tcf_vlan_push_proto(act);
3624
+ entry->vlan.prio = tcf_vlan_push_prio(act);
3625
+ break;
3626
+ case TCA_VLAN_ACT_POP:
3627
+ entry->id = FLOW_ACTION_VLAN_POP;
3628
+ break;
3629
+ case TCA_VLAN_ACT_MODIFY:
3630
+ entry->id = FLOW_ACTION_VLAN_MANGLE;
3631
+ entry->vlan.vid = tcf_vlan_push_vid(act);
3632
+ entry->vlan.proto = tcf_vlan_push_proto(act);
3633
+ entry->vlan.prio = tcf_vlan_push_prio(act);
3634
+ break;
3635
+ default:
3636
+ err = -EOPNOTSUPP;
3637
+ goto err_out_locked;
3638
+ }
3639
+ } else if (is_tcf_tunnel_set(act)) {
3640
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3641
+ err = tcf_tunnel_encap_get_tunnel(entry, act);
3642
+ if (err)
3643
+ goto err_out_locked;
3644
+ } else if (is_tcf_tunnel_release(act)) {
3645
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
3646
+ } else if (is_tcf_pedit(act)) {
3647
+ for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3648
+ switch (tcf_pedit_cmd(act, k)) {
3649
+ case TCA_PEDIT_KEY_EX_CMD_SET:
3650
+ entry->id = FLOW_ACTION_MANGLE;
3651
+ break;
3652
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
3653
+ entry->id = FLOW_ACTION_ADD;
3654
+ break;
3655
+ default:
3656
+ err = -EOPNOTSUPP;
3657
+ goto err_out_locked;
3658
+ }
3659
+ entry->mangle.htype = tcf_pedit_htype(act, k);
3660
+ entry->mangle.mask = tcf_pedit_mask(act, k);
3661
+ entry->mangle.val = tcf_pedit_val(act, k);
3662
+ entry->mangle.offset = tcf_pedit_offset(act, k);
3663
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3664
+ entry = &flow_action->entries[++j];
3665
+ }
3666
+ } else if (is_tcf_csum(act)) {
3667
+ entry->id = FLOW_ACTION_CSUM;
3668
+ entry->csum_flags = tcf_csum_update_flags(act);
3669
+ } else if (is_tcf_skbedit_mark(act)) {
3670
+ entry->id = FLOW_ACTION_MARK;
3671
+ entry->mark = tcf_skbedit_mark(act);
3672
+ } else if (is_tcf_sample(act)) {
3673
+ entry->id = FLOW_ACTION_SAMPLE;
3674
+ entry->sample.trunc_size = tcf_sample_trunc_size(act);
3675
+ entry->sample.truncate = tcf_sample_truncate(act);
3676
+ entry->sample.rate = tcf_sample_rate(act);
3677
+ tcf_sample_get_group(entry, act);
3678
+ } else if (is_tcf_police(act)) {
3679
+ entry->id = FLOW_ACTION_POLICE;
3680
+ entry->police.burst = tcf_police_burst(act);
3681
+ entry->police.rate_bytes_ps =
3682
+ tcf_police_rate_bytes_ps(act);
3683
+ entry->police.mtu = tcf_police_tcfp_mtu(act);
3684
+ entry->police.index = act->tcfa_index;
3685
+ } else if (is_tcf_ct(act)) {
3686
+ entry->id = FLOW_ACTION_CT;
3687
+ entry->ct.action = tcf_ct_action(act);
3688
+ entry->ct.zone = tcf_ct_zone(act);
3689
+ entry->ct.flow_table = tcf_ct_ft(act);
3690
+ } else if (is_tcf_mpls(act)) {
3691
+ switch (tcf_mpls_action(act)) {
3692
+ case TCA_MPLS_ACT_PUSH:
3693
+ entry->id = FLOW_ACTION_MPLS_PUSH;
3694
+ entry->mpls_push.proto = tcf_mpls_proto(act);
3695
+ entry->mpls_push.label = tcf_mpls_label(act);
3696
+ entry->mpls_push.tc = tcf_mpls_tc(act);
3697
+ entry->mpls_push.bos = tcf_mpls_bos(act);
3698
+ entry->mpls_push.ttl = tcf_mpls_ttl(act);
3699
+ break;
3700
+ case TCA_MPLS_ACT_POP:
3701
+ entry->id = FLOW_ACTION_MPLS_POP;
3702
+ entry->mpls_pop.proto = tcf_mpls_proto(act);
3703
+ break;
3704
+ case TCA_MPLS_ACT_MODIFY:
3705
+ entry->id = FLOW_ACTION_MPLS_MANGLE;
3706
+ entry->mpls_mangle.label = tcf_mpls_label(act);
3707
+ entry->mpls_mangle.tc = tcf_mpls_tc(act);
3708
+ entry->mpls_mangle.bos = tcf_mpls_bos(act);
3709
+ entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
3710
+ break;
3711
+ default:
3712
+ err = -EOPNOTSUPP;
3713
+ goto err_out_locked;
3714
+ }
3715
+ } else if (is_tcf_skbedit_ptype(act)) {
3716
+ entry->id = FLOW_ACTION_PTYPE;
3717
+ entry->ptype = tcf_skbedit_ptype(act);
3718
+ } else if (is_tcf_skbedit_priority(act)) {
3719
+ entry->id = FLOW_ACTION_PRIORITY;
3720
+ entry->priority = tcf_skbedit_priority(act);
3721
+ } else if (is_tcf_gate(act)) {
3722
+ entry->id = FLOW_ACTION_GATE;
3723
+ entry->gate.index = tcf_gate_index(act);
3724
+ entry->gate.prio = tcf_gate_prio(act);
3725
+ entry->gate.basetime = tcf_gate_basetime(act);
3726
+ entry->gate.cycletime = tcf_gate_cycletime(act);
3727
+ entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
3728
+ entry->gate.num_entries = tcf_gate_num_entries(act);
3729
+ err = tcf_gate_get_entries(entry, act);
3730
+ if (err)
3731
+ goto err_out_locked;
3732
+ } else {
3733
+ err = -EOPNOTSUPP;
3734
+ goto err_out_locked;
3735
+ }
3736
+ spin_unlock_bh(&act->tcfa_lock);
3737
+
3738
+ if (!is_tcf_pedit(act))
3739
+ j++;
3740
+ }
3741
+
3742
+err_out:
3743
+ if (err)
3744
+ tc_cleanup_flow_action(flow_action);
3745
+
3746
+ return err;
3747
+err_out_locked:
3748
+ spin_unlock_bh(&act->tcfa_lock);
3749
+ goto err_out;
3750
+}
3751
+EXPORT_SYMBOL(tc_setup_flow_action);
3752
+
3753
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3754
+{
3755
+ unsigned int num_acts = 0;
3756
+ struct tc_action *act;
3757
+ int i;
3758
+
3759
+ tcf_exts_for_each_action(i, act, exts) {
3760
+ if (is_tcf_pedit(act))
3761
+ num_acts += tcf_pedit_nkeys(act);
3762
+ else
3763
+ num_acts++;
3764
+ }
3765
+ return num_acts;
3766
+}
3767
+EXPORT_SYMBOL(tcf_exts_num_actions);
3768
+
3769
+#ifdef CONFIG_NET_CLS_ACT
3770
+static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
3771
+ u32 *p_block_index,
3772
+ struct netlink_ext_ack *extack)
3773
+{
3774
+ *p_block_index = nla_get_u32(block_index_attr);
3775
+ if (!*p_block_index) {
3776
+ NL_SET_ERR_MSG(extack, "Block number may not be zero");
3777
+ return -EINVAL;
3778
+ }
3779
+
3780
+ return 0;
3781
+}
3782
+
3783
+int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
3784
+ enum flow_block_binder_type binder_type,
3785
+ struct nlattr *block_index_attr,
3786
+ struct netlink_ext_ack *extack)
3787
+{
3788
+ u32 block_index;
3789
+ int err;
3790
+
3791
+ if (!block_index_attr)
3792
+ return 0;
3793
+
3794
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3795
+ if (err)
3796
+ return err;
3797
+
3798
+ if (!block_index)
3799
+ return 0;
3800
+
3801
+ qe->info.binder_type = binder_type;
3802
+ qe->info.chain_head_change = tcf_chain_head_change_dflt;
3803
+ qe->info.chain_head_change_priv = &qe->filter_chain;
3804
+ qe->info.block_index = block_index;
3805
+
3806
+ return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
3807
+}
3808
+EXPORT_SYMBOL(tcf_qevent_init);
3809
+
3810
+void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
3811
+{
3812
+ if (qe->info.block_index)
3813
+ tcf_block_put_ext(qe->block, sch, &qe->info);
3814
+}
3815
+EXPORT_SYMBOL(tcf_qevent_destroy);
3816
+
3817
+int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
3818
+ struct netlink_ext_ack *extack)
3819
+{
3820
+ u32 block_index;
3821
+ int err;
3822
+
3823
+ if (!block_index_attr)
3824
+ return 0;
3825
+
3826
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3827
+ if (err)
3828
+ return err;
3829
+
3830
+ /* Bounce newly-configured block or change in block. */
3831
+ if (block_index != qe->info.block_index) {
3832
+ NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
3833
+ return -EINVAL;
3834
+ }
3835
+
3836
+ return 0;
3837
+}
3838
+EXPORT_SYMBOL(tcf_qevent_validate_change);
3839
+
3840
+struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3841
+ struct sk_buff **to_free, int *ret)
3842
+{
3843
+ struct tcf_result cl_res;
3844
+ struct tcf_proto *fl;
3845
+
3846
+ if (!qe->info.block_index)
3847
+ return skb;
3848
+
3849
+ fl = rcu_dereference_bh(qe->filter_chain);
3850
+
3851
+ switch (tcf_classify(skb, fl, &cl_res, false)) {
3852
+ case TC_ACT_SHOT:
3853
+ qdisc_qstats_drop(sch);
3854
+ __qdisc_drop(skb, to_free);
3855
+ *ret = __NET_XMIT_BYPASS;
3856
+ return NULL;
3857
+ case TC_ACT_STOLEN:
3858
+ case TC_ACT_QUEUED:
3859
+ case TC_ACT_TRAP:
3860
+ __qdisc_drop(skb, to_free);
3861
+ *ret = __NET_XMIT_STOLEN;
3862
+ return NULL;
3863
+ case TC_ACT_REDIRECT:
3864
+ skb_do_redirect(skb);
3865
+ *ret = __NET_XMIT_STOLEN;
3866
+ return NULL;
3867
+ }
3868
+
3869
+ return skb;
3870
+}
3871
+EXPORT_SYMBOL(tcf_qevent_handle);
3872
+
3873
+int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
3874
+{
3875
+ if (!qe->info.block_index)
3876
+ return 0;
3877
+ return nla_put_u32(skb, attr_name, qe->info.block_index);
3878
+}
3879
+EXPORT_SYMBOL(tcf_qevent_dump);
3880
+#endif
22693881
22703882 static __net_init int tcf_net_init(struct net *net)
22713883 {
22723884 struct tcf_net *tn = net_generic(net, tcf_net_id);
22733885
3886
+ spin_lock_init(&tn->idr_lock);
22743887 idr_init(&tn->idr);
22753888 return 0;
22763889 }
....@@ -2301,10 +3914,12 @@
23013914 if (err)
23023915 goto err_register_pernet_subsys;
23033916
2304
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
2305
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
3917
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3918
+ RTNL_FLAG_DOIT_UNLOCKED);
3919
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3920
+ RTNL_FLAG_DOIT_UNLOCKED);
23063921 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
2307
- tc_dump_tfilter, 0);
3922
+ tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
23083923 rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
23093924 rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
23103925 rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,