hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/net/sched/cls_api.c
....@@ -1,17 +1,12 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * net/sched/cls_api.c Packet classifier API.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License, or (at your option) any later version.
84 *
95 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
106 *
117 * Changes:
128 *
139 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14
- *
1510 */
1611
1712 #include <linux/module.h>
....@@ -25,11 +20,26 @@
2520 #include <linux/kmod.h>
2621 #include <linux/slab.h>
2722 #include <linux/idr.h>
23
+#include <linux/jhash.h>
24
+#include <linux/rculist.h>
2825 #include <net/net_namespace.h>
2926 #include <net/sock.h>
3027 #include <net/netlink.h>
3128 #include <net/pkt_sched.h>
3229 #include <net/pkt_cls.h>
30
+#include <net/tc_act/tc_pedit.h>
31
+#include <net/tc_act/tc_mirred.h>
32
+#include <net/tc_act/tc_vlan.h>
33
+#include <net/tc_act/tc_tunnel_key.h>
34
+#include <net/tc_act/tc_csum.h>
35
+#include <net/tc_act/tc_gact.h>
36
+#include <net/tc_act/tc_police.h>
37
+#include <net/tc_act/tc_sample.h>
38
+#include <net/tc_act/tc_skbedit.h>
39
+#include <net/tc_act/tc_ct.h>
40
+#include <net/tc_act/tc_mpls.h>
41
+#include <net/tc_act/tc_gate.h>
42
+#include <net/flow_offload.h>
3343
3444 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
3545
....@@ -38,6 +48,62 @@
3848
3949 /* Protects list of registered TC modules. It is pure SMP lock. */
4050 static DEFINE_RWLOCK(cls_mod_lock);
51
+
52
+static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
53
+{
54
+ return jhash_3words(tp->chain->index, tp->prio,
55
+ (__force __u32)tp->protocol, 0);
56
+}
57
+
58
+static void tcf_proto_signal_destroying(struct tcf_chain *chain,
59
+ struct tcf_proto *tp)
60
+{
61
+ struct tcf_block *block = chain->block;
62
+
63
+ mutex_lock(&block->proto_destroy_lock);
64
+ hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
65
+ destroy_obj_hashfn(tp));
66
+ mutex_unlock(&block->proto_destroy_lock);
67
+}
68
+
69
+static bool tcf_proto_cmp(const struct tcf_proto *tp1,
70
+ const struct tcf_proto *tp2)
71
+{
72
+ return tp1->chain->index == tp2->chain->index &&
73
+ tp1->prio == tp2->prio &&
74
+ tp1->protocol == tp2->protocol;
75
+}
76
+
77
+static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
78
+ struct tcf_proto *tp)
79
+{
80
+ u32 hash = destroy_obj_hashfn(tp);
81
+ struct tcf_proto *iter;
82
+ bool found = false;
83
+
84
+ rcu_read_lock();
85
+ hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
86
+ destroy_ht_node, hash) {
87
+ if (tcf_proto_cmp(tp, iter)) {
88
+ found = true;
89
+ break;
90
+ }
91
+ }
92
+ rcu_read_unlock();
93
+
94
+ return found;
95
+}
96
+
97
+static void
98
+tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
99
+{
100
+ struct tcf_block *block = chain->block;
101
+
102
+ mutex_lock(&block->proto_destroy_lock);
103
+ if (hash_hashed(&tp->destroy_ht_node))
104
+ hash_del_rcu(&tp->destroy_ht_node);
105
+ mutex_unlock(&block->proto_destroy_lock);
106
+}
41107
42108 /* Find classifier type by string name */
43109
....@@ -60,7 +126,8 @@
60126 }
61127
62128 static const struct tcf_proto_ops *
63
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
129
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
130
+ struct netlink_ext_ack *extack)
64131 {
65132 const struct tcf_proto_ops *ops;
66133
....@@ -68,9 +135,11 @@
68135 if (ops)
69136 return ops;
70137 #ifdef CONFIG_MODULES
71
- rtnl_unlock();
138
+ if (rtnl_held)
139
+ rtnl_unlock();
72140 request_module("cls_%s", kind);
73
- rtnl_lock();
141
+ if (rtnl_held)
142
+ rtnl_lock();
74143 ops = __tcf_proto_lookup_ops(kind);
75144 /* We dropped the RTNL semaphore in order to perform
76145 * the module load. So, even if we succeeded in loading
....@@ -151,8 +220,37 @@
151220 return TC_H_MAJ(first);
152221 }
153222
223
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
224
+{
225
+ if (kind)
226
+ return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
227
+ memset(name, 0, IFNAMSIZ);
228
+ return false;
229
+}
230
+
231
+static bool tcf_proto_is_unlocked(const char *kind)
232
+{
233
+ const struct tcf_proto_ops *ops;
234
+ bool ret;
235
+
236
+ if (strlen(kind) == 0)
237
+ return false;
238
+
239
+ ops = tcf_proto_lookup_ops(kind, false, NULL);
240
+ /* On error return false to take rtnl lock. Proto lookup/create
241
+ * functions will perform lookup again and properly handle errors.
242
+ */
243
+ if (IS_ERR(ops))
244
+ return false;
245
+
246
+ ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
247
+ module_put(ops->owner);
248
+ return ret;
249
+}
250
+
154251 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
155252 u32 prio, struct tcf_chain *chain,
253
+ bool rtnl_held,
156254 struct netlink_ext_ack *extack)
157255 {
158256 struct tcf_proto *tp;
....@@ -162,7 +260,7 @@
162260 if (!tp)
163261 return ERR_PTR(-ENOBUFS);
164262
165
- tp->ops = tcf_proto_lookup_ops(kind, extack);
263
+ tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
166264 if (IS_ERR(tp->ops)) {
167265 err = PTR_ERR(tp->ops);
168266 goto errout;
....@@ -171,6 +269,8 @@
171269 tp->protocol = protocol;
172270 tp->prio = prio;
173271 tp->chain = chain;
272
+ spin_lock_init(&tp->lock);
273
+ refcount_set(&tp->refcnt, 1);
174274
175275 err = tp->ops->init(tp);
176276 if (err) {
....@@ -184,13 +284,60 @@
184284 return ERR_PTR(err);
185285 }
186286
187
-static void tcf_proto_destroy(struct tcf_proto *tp,
188
- struct netlink_ext_ack *extack)
287
+static void tcf_proto_get(struct tcf_proto *tp)
189288 {
190
- tp->ops->destroy(tp, extack);
289
+ refcount_inc(&tp->refcnt);
290
+}
291
+
292
+static void tcf_chain_put(struct tcf_chain *chain);
293
+
294
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
295
+ bool sig_destroy, struct netlink_ext_ack *extack)
296
+{
297
+ tp->ops->destroy(tp, rtnl_held, extack);
298
+ if (sig_destroy)
299
+ tcf_proto_signal_destroyed(tp->chain, tp);
300
+ tcf_chain_put(tp->chain);
191301 module_put(tp->ops->owner);
192302 kfree_rcu(tp, rcu);
193303 }
304
+
305
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
306
+ struct netlink_ext_ack *extack)
307
+{
308
+ if (refcount_dec_and_test(&tp->refcnt))
309
+ tcf_proto_destroy(tp, rtnl_held, true, extack);
310
+}
311
+
312
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
313
+{
314
+ if (tp->ops->delete_empty)
315
+ return tp->ops->delete_empty(tp);
316
+
317
+ tp->deleting = true;
318
+ return tp->deleting;
319
+}
320
+
321
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
322
+{
323
+ spin_lock(&tp->lock);
324
+ tp->deleting = true;
325
+ spin_unlock(&tp->lock);
326
+}
327
+
328
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
329
+{
330
+ bool deleting;
331
+
332
+ spin_lock(&tp->lock);
333
+ deleting = tp->deleting;
334
+ spin_unlock(&tp->lock);
335
+
336
+ return deleting;
337
+}
338
+
339
+#define ASSERT_BLOCK_LOCKED(block) \
340
+ lockdep_assert_held(&(block)->lock)
194341
195342 struct tcf_filter_chain_list_item {
196343 struct list_head list;
....@@ -203,10 +350,13 @@
203350 {
204351 struct tcf_chain *chain;
205352
353
+ ASSERT_BLOCK_LOCKED(block);
354
+
206355 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
207356 if (!chain)
208357 return NULL;
209
- list_add_tail(&chain->list, &block->chain_list);
358
+ list_add_tail_rcu(&chain->list, &block->chain_list);
359
+ mutex_init(&chain->filter_chain_lock);
210360 chain->block = block;
211361 chain->index = chain_index;
212362 chain->refcnt = 1;
....@@ -230,29 +380,60 @@
230380
231381 if (chain->index)
232382 return;
383
+
384
+ mutex_lock(&block->lock);
233385 list_for_each_entry(item, &block->chain0.filter_chain_list, list)
234386 tcf_chain_head_change_item(item, tp_head);
387
+ mutex_unlock(&block->lock);
235388 }
236389
237
-static void tcf_chain_destroy(struct tcf_chain *chain)
390
+/* Returns true if block can be safely freed. */
391
+
392
+static bool tcf_chain_detach(struct tcf_chain *chain)
238393 {
239394 struct tcf_block *block = chain->block;
240395
241
- list_del(&chain->list);
396
+ ASSERT_BLOCK_LOCKED(block);
397
+
398
+ list_del_rcu(&chain->list);
242399 if (!chain->index)
243400 block->chain0.chain = NULL;
244
- kfree(chain);
245
- if (list_empty(&block->chain_list) && block->refcnt == 0)
246
- kfree(block);
401
+
402
+ if (list_empty(&block->chain_list) &&
403
+ refcount_read(&block->refcnt) == 0)
404
+ return true;
405
+
406
+ return false;
407
+}
408
+
409
+static void tcf_block_destroy(struct tcf_block *block)
410
+{
411
+ mutex_destroy(&block->lock);
412
+ mutex_destroy(&block->proto_destroy_lock);
413
+ kfree_rcu(block, rcu);
414
+}
415
+
416
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
417
+{
418
+ struct tcf_block *block = chain->block;
419
+
420
+ mutex_destroy(&chain->filter_chain_lock);
421
+ kfree_rcu(chain, rcu);
422
+ if (free_block)
423
+ tcf_block_destroy(block);
247424 }
248425
249426 static void tcf_chain_hold(struct tcf_chain *chain)
250427 {
428
+ ASSERT_BLOCK_LOCKED(chain->block);
429
+
251430 ++chain->refcnt;
252431 }
253432
254433 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
255434 {
435
+ ASSERT_BLOCK_LOCKED(chain->block);
436
+
256437 /* In case all the references are action references, this
257438 * chain should not be shown to the user.
258439 */
....@@ -264,12 +445,28 @@
264445 {
265446 struct tcf_chain *chain;
266447
448
+ ASSERT_BLOCK_LOCKED(block);
449
+
267450 list_for_each_entry(chain, &block->chain_list, list) {
268451 if (chain->index == chain_index)
269452 return chain;
270453 }
271454 return NULL;
272455 }
456
+
457
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
458
+static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
459
+ u32 chain_index)
460
+{
461
+ struct tcf_chain *chain;
462
+
463
+ list_for_each_entry_rcu(chain, &block->chain_list, list) {
464
+ if (chain->index == chain_index)
465
+ return chain;
466
+ }
467
+ return NULL;
468
+}
469
+#endif
273470
274471 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
275472 u32 seq, u16 flags, int event, bool unicast);
....@@ -278,30 +475,39 @@
278475 u32 chain_index, bool create,
279476 bool by_act)
280477 {
281
- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
478
+ struct tcf_chain *chain = NULL;
479
+ bool is_first_reference;
282480
481
+ mutex_lock(&block->lock);
482
+ chain = tcf_chain_lookup(block, chain_index);
283483 if (chain) {
284484 tcf_chain_hold(chain);
285485 } else {
286486 if (!create)
287
- return NULL;
487
+ goto errout;
288488 chain = tcf_chain_create(block, chain_index);
289489 if (!chain)
290
- return NULL;
490
+ goto errout;
291491 }
292492
293493 if (by_act)
294494 ++chain->action_refcnt;
495
+ is_first_reference = chain->refcnt - chain->action_refcnt == 1;
496
+ mutex_unlock(&block->lock);
295497
296498 /* Send notification only in case we got the first
297499 * non-action reference. Until then, the chain acts only as
298500 * a placeholder for actions pointing to it and user ought
299501 * not know about them.
300502 */
301
- if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
503
+ if (is_first_reference && !by_act)
302504 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
303505 RTM_NEWCHAIN, false);
304506
507
+ return chain;
508
+
509
+errout:
510
+ mutex_unlock(&block->lock);
305511 return chain;
306512 }
307513
....@@ -317,72 +523,180 @@
317523 }
318524 EXPORT_SYMBOL(tcf_chain_get_by_act);
319525
320
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
526
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
527
+ void *tmplt_priv);
528
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
529
+ void *tmplt_priv, u32 chain_index,
530
+ struct tcf_block *block, struct sk_buff *oskb,
531
+ u32 seq, u16 flags, bool unicast);
321532
322
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
533
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
534
+ bool explicitly_created)
323535 {
536
+ struct tcf_block *block = chain->block;
537
+ const struct tcf_proto_ops *tmplt_ops;
538
+ bool free_block = false;
539
+ unsigned int refcnt;
540
+ void *tmplt_priv;
541
+
542
+ mutex_lock(&block->lock);
543
+ if (explicitly_created) {
544
+ if (!chain->explicitly_created) {
545
+ mutex_unlock(&block->lock);
546
+ return;
547
+ }
548
+ chain->explicitly_created = false;
549
+ }
550
+
324551 if (by_act)
325552 chain->action_refcnt--;
326
- chain->refcnt--;
553
+
554
+ /* tc_chain_notify_delete can't be called while holding block lock.
555
+ * However, when block is unlocked chain can be changed concurrently, so
556
+ * save these to temporary variables.
557
+ */
558
+ refcnt = --chain->refcnt;
559
+ tmplt_ops = chain->tmplt_ops;
560
+ tmplt_priv = chain->tmplt_priv;
327561
328562 /* The last dropped non-action reference will trigger notification. */
329
- if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
330
- tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
563
+ if (refcnt - chain->action_refcnt == 0 && !by_act) {
564
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
565
+ block, NULL, 0, 0, false);
566
+ /* Last reference to chain, no need to lock. */
567
+ chain->flushing = false;
568
+ }
331569
332
- if (chain->refcnt == 0) {
333
- tc_chain_tmplt_del(chain);
334
- tcf_chain_destroy(chain);
570
+ if (refcnt == 0)
571
+ free_block = tcf_chain_detach(chain);
572
+ mutex_unlock(&block->lock);
573
+
574
+ if (refcnt == 0) {
575
+ tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
576
+ tcf_chain_destroy(chain, free_block);
335577 }
336578 }
337579
338580 static void tcf_chain_put(struct tcf_chain *chain)
339581 {
340
- __tcf_chain_put(chain, false);
582
+ __tcf_chain_put(chain, false, false);
341583 }
342584
343585 void tcf_chain_put_by_act(struct tcf_chain *chain)
344586 {
345
- __tcf_chain_put(chain, true);
587
+ __tcf_chain_put(chain, true, false);
346588 }
347589 EXPORT_SYMBOL(tcf_chain_put_by_act);
348590
349591 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
350592 {
351
- if (chain->explicitly_created)
352
- tcf_chain_put(chain);
593
+ __tcf_chain_put(chain, false, true);
353594 }
354595
355
-static void tcf_chain_flush(struct tcf_chain *chain)
596
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
356597 {
357
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
598
+ struct tcf_proto *tp, *tp_next;
358599
359
- tcf_chain0_head_change(chain, NULL);
600
+ mutex_lock(&chain->filter_chain_lock);
601
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
360602 while (tp) {
361
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
362
- tcf_proto_destroy(tp, NULL);
363
- tp = rtnl_dereference(chain->filter_chain);
364
- tcf_chain_put(chain);
603
+ tp_next = rcu_dereference_protected(tp->next, 1);
604
+ tcf_proto_signal_destroying(chain, tp);
605
+ tp = tp_next;
365606 }
607
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
608
+ RCU_INIT_POINTER(chain->filter_chain, NULL);
609
+ tcf_chain0_head_change(chain, NULL);
610
+ chain->flushing = true;
611
+ mutex_unlock(&chain->filter_chain_lock);
612
+
613
+ while (tp) {
614
+ tp_next = rcu_dereference_protected(tp->next, 1);
615
+ tcf_proto_put(tp, rtnl_held, NULL);
616
+ tp = tp_next;
617
+ }
618
+}
619
+
620
+static int tcf_block_setup(struct tcf_block *block,
621
+ struct flow_block_offload *bo);
622
+
623
+static void tcf_block_offload_init(struct flow_block_offload *bo,
624
+ struct net_device *dev, struct Qdisc *sch,
625
+ enum flow_block_command command,
626
+ enum flow_block_binder_type binder_type,
627
+ struct flow_block *flow_block,
628
+ bool shared, struct netlink_ext_ack *extack)
629
+{
630
+ bo->net = dev_net(dev);
631
+ bo->command = command;
632
+ bo->binder_type = binder_type;
633
+ bo->block = flow_block;
634
+ bo->block_shared = shared;
635
+ bo->extack = extack;
636
+ bo->sch = sch;
637
+ bo->cb_list_head = &flow_block->cb_list;
638
+ INIT_LIST_HEAD(&bo->cb_list);
639
+}
640
+
641
+static void tcf_block_unbind(struct tcf_block *block,
642
+ struct flow_block_offload *bo);
643
+
644
+static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
645
+{
646
+ struct tcf_block *block = block_cb->indr.data;
647
+ struct net_device *dev = block_cb->indr.dev;
648
+ struct Qdisc *sch = block_cb->indr.sch;
649
+ struct netlink_ext_ack extack = {};
650
+ struct flow_block_offload bo = {};
651
+
652
+ tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
653
+ block_cb->indr.binder_type,
654
+ &block->flow_block, tcf_block_shared(block),
655
+ &extack);
656
+ rtnl_lock();
657
+ down_write(&block->cb_lock);
658
+ list_del(&block_cb->driver_list);
659
+ list_move(&block_cb->list, &bo.cb_list);
660
+ tcf_block_unbind(block, &bo);
661
+ up_write(&block->cb_lock);
662
+ rtnl_unlock();
366663 }
367664
368665 static bool tcf_block_offload_in_use(struct tcf_block *block)
369666 {
370
- return block->offloadcnt;
667
+ return atomic_read(&block->offloadcnt);
371668 }
372669
373670 static int tcf_block_offload_cmd(struct tcf_block *block,
374
- struct net_device *dev,
671
+ struct net_device *dev, struct Qdisc *sch,
375672 struct tcf_block_ext_info *ei,
376
- enum tc_block_command command,
673
+ enum flow_block_command command,
377674 struct netlink_ext_ack *extack)
378675 {
379
- struct tc_block_offload bo = {};
676
+ struct flow_block_offload bo = {};
380677
381
- bo.command = command;
382
- bo.binder_type = ei->binder_type;
383
- bo.block = block;
384
- bo.extack = extack;
385
- return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
678
+ tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
679
+ &block->flow_block, tcf_block_shared(block),
680
+ extack);
681
+
682
+ if (dev->netdev_ops->ndo_setup_tc) {
683
+ int err;
684
+
685
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
686
+ if (err < 0) {
687
+ if (err != -EOPNOTSUPP)
688
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
689
+ return err;
690
+ }
691
+
692
+ return tcf_block_setup(block, &bo);
693
+ }
694
+
695
+ flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
696
+ tc_block_indr_cleanup);
697
+ tcf_block_setup(block, &bo);
698
+
699
+ return -EOPNOTSUPP;
386700 }
387701
388702 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
....@@ -392,27 +706,37 @@
392706 struct net_device *dev = q->dev_queue->dev;
393707 int err;
394708
395
- if (!dev->netdev_ops->ndo_setup_tc)
396
- goto no_offload_dev_inc;
709
+ down_write(&block->cb_lock);
397710
398711 /* If tc offload feature is disabled and the block we try to bind
399712 * to already has some offloaded filters, forbid to bind.
400713 */
401
- if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
714
+ if (dev->netdev_ops->ndo_setup_tc &&
715
+ !tc_can_offload(dev) &&
716
+ tcf_block_offload_in_use(block)) {
402717 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
403
- return -EOPNOTSUPP;
718
+ err = -EOPNOTSUPP;
719
+ goto err_unlock;
404720 }
405721
406
- err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
722
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
407723 if (err == -EOPNOTSUPP)
408724 goto no_offload_dev_inc;
409
- return err;
725
+ if (err)
726
+ goto err_unlock;
727
+
728
+ up_write(&block->cb_lock);
729
+ return 0;
410730
411731 no_offload_dev_inc:
412732 if (tcf_block_offload_in_use(block))
413
- return -EOPNOTSUPP;
733
+ goto err_unlock;
734
+
735
+ err = 0;
414736 block->nooffloaddevcnt++;
415
- return 0;
737
+err_unlock:
738
+ up_write(&block->cb_lock);
739
+ return err;
416740 }
417741
418742 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
....@@ -421,15 +745,16 @@
421745 struct net_device *dev = q->dev_queue->dev;
422746 int err;
423747
424
- if (!dev->netdev_ops->ndo_setup_tc)
425
- goto no_offload_dev_dec;
426
- err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
748
+ down_write(&block->cb_lock);
749
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
427750 if (err == -EOPNOTSUPP)
428751 goto no_offload_dev_dec;
752
+ up_write(&block->cb_lock);
429753 return;
430754
431755 no_offload_dev_dec:
432756 WARN_ON(block->nooffloaddevcnt-- == 0);
757
+ up_write(&block->cb_lock);
433758 }
434759
435760 static int
....@@ -437,8 +762,8 @@
437762 struct tcf_block_ext_info *ei,
438763 struct netlink_ext_ack *extack)
439764 {
440
- struct tcf_chain *chain0 = block->chain0.chain;
441765 struct tcf_filter_chain_list_item *item;
766
+ struct tcf_chain *chain0;
442767
443768 item = kmalloc(sizeof(*item), GFP_KERNEL);
444769 if (!item) {
....@@ -447,9 +772,32 @@
447772 }
448773 item->chain_head_change = ei->chain_head_change;
449774 item->chain_head_change_priv = ei->chain_head_change_priv;
450
- if (chain0 && chain0->filter_chain)
451
- tcf_chain_head_change_item(item, chain0->filter_chain);
452
- list_add(&item->list, &block->chain0.filter_chain_list);
775
+
776
+ mutex_lock(&block->lock);
777
+ chain0 = block->chain0.chain;
778
+ if (chain0)
779
+ tcf_chain_hold(chain0);
780
+ else
781
+ list_add(&item->list, &block->chain0.filter_chain_list);
782
+ mutex_unlock(&block->lock);
783
+
784
+ if (chain0) {
785
+ struct tcf_proto *tp_head;
786
+
787
+ mutex_lock(&chain0->filter_chain_lock);
788
+
789
+ tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
790
+ if (tp_head)
791
+ tcf_chain_head_change_item(item, tp_head);
792
+
793
+ mutex_lock(&block->lock);
794
+ list_add(&item->list, &block->chain0.filter_chain_list);
795
+ mutex_unlock(&block->lock);
796
+
797
+ mutex_unlock(&chain0->filter_chain_lock);
798
+ tcf_chain_put(chain0);
799
+ }
800
+
453801 return 0;
454802 }
455803
....@@ -457,24 +805,28 @@
457805 tcf_chain0_head_change_cb_del(struct tcf_block *block,
458806 struct tcf_block_ext_info *ei)
459807 {
460
- struct tcf_chain *chain0 = block->chain0.chain;
461808 struct tcf_filter_chain_list_item *item;
462809
810
+ mutex_lock(&block->lock);
463811 list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
464812 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
465813 (item->chain_head_change == ei->chain_head_change &&
466814 item->chain_head_change_priv == ei->chain_head_change_priv)) {
467
- if (chain0)
815
+ if (block->chain0.chain)
468816 tcf_chain_head_change_item(item, NULL);
469817 list_del(&item->list);
818
+ mutex_unlock(&block->lock);
819
+
470820 kfree(item);
471821 return;
472822 }
473823 }
824
+ mutex_unlock(&block->lock);
474825 WARN_ON(1);
475826 }
476827
477828 struct tcf_net {
829
+ spinlock_t idr_lock; /* Protects idr */
478830 struct idr idr;
479831 };
480832
....@@ -484,16 +836,25 @@
484836 struct netlink_ext_ack *extack)
485837 {
486838 struct tcf_net *tn = net_generic(net, tcf_net_id);
839
+ int err;
487840
488
- return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
489
- GFP_KERNEL);
841
+ idr_preload(GFP_KERNEL);
842
+ spin_lock(&tn->idr_lock);
843
+ err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
844
+ GFP_NOWAIT);
845
+ spin_unlock(&tn->idr_lock);
846
+ idr_preload_end();
847
+
848
+ return err;
490849 }
491850
492851 static void tcf_block_remove(struct tcf_block *block, struct net *net)
493852 {
494853 struct tcf_net *tn = net_generic(net, tcf_net_id);
495854
855
+ spin_lock(&tn->idr_lock);
496856 idr_remove(&tn->idr, block->index);
857
+ spin_unlock(&tn->idr_lock);
497858 }
498859
499860 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
....@@ -507,12 +868,15 @@
507868 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
508869 return ERR_PTR(-ENOMEM);
509870 }
871
+ mutex_init(&block->lock);
872
+ mutex_init(&block->proto_destroy_lock);
873
+ init_rwsem(&block->cb_lock);
874
+ flow_block_init(&block->flow_block);
510875 INIT_LIST_HEAD(&block->chain_list);
511
- INIT_LIST_HEAD(&block->cb_list);
512876 INIT_LIST_HEAD(&block->owner_list);
513877 INIT_LIST_HEAD(&block->chain0.filter_chain_list);
514878
515
- block->refcnt = 1;
879
+ refcount_set(&block->refcnt, 1);
516880 block->net = net;
517881 block->index = block_index;
518882
....@@ -529,6 +893,301 @@
529893 return idr_find(&tn->idr, block_index);
530894 }
531895
896
+static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
897
+{
898
+ struct tcf_block *block;
899
+
900
+ rcu_read_lock();
901
+ block = tcf_block_lookup(net, block_index);
902
+ if (block && !refcount_inc_not_zero(&block->refcnt))
903
+ block = NULL;
904
+ rcu_read_unlock();
905
+
906
+ return block;
907
+}
908
+
909
+static struct tcf_chain *
910
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
911
+{
912
+ mutex_lock(&block->lock);
913
+ if (chain)
914
+ chain = list_is_last(&chain->list, &block->chain_list) ?
915
+ NULL : list_next_entry(chain, list);
916
+ else
917
+ chain = list_first_entry_or_null(&block->chain_list,
918
+ struct tcf_chain, list);
919
+
920
+ /* skip all action-only chains */
921
+ while (chain && tcf_chain_held_by_acts_only(chain))
922
+ chain = list_is_last(&chain->list, &block->chain_list) ?
923
+ NULL : list_next_entry(chain, list);
924
+
925
+ if (chain)
926
+ tcf_chain_hold(chain);
927
+ mutex_unlock(&block->lock);
928
+
929
+ return chain;
930
+}
931
+
932
+/* Function to be used by all clients that want to iterate over all chains on
933
+ * block. It properly obtains block->lock and takes reference to chain before
934
+ * returning it. Users of this function must be tolerant to concurrent chain
935
+ * insertion/deletion or ensure that no concurrent chain modification is
936
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
937
+ * consistent dump because rtnl lock is released each time skb is filled with
938
+ * data and sent to user-space.
939
+ */
940
+
941
+struct tcf_chain *
942
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
943
+{
944
+ struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
945
+
946
+ if (chain)
947
+ tcf_chain_put(chain);
948
+
949
+ return chain_next;
950
+}
951
+EXPORT_SYMBOL(tcf_get_next_chain);
952
+
953
+static struct tcf_proto *
954
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
955
+{
956
+ u32 prio = 0;
957
+
958
+ ASSERT_RTNL();
959
+ mutex_lock(&chain->filter_chain_lock);
960
+
961
+ if (!tp) {
962
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
963
+ } else if (tcf_proto_is_deleting(tp)) {
964
+ /* 'deleting' flag is set and chain->filter_chain_lock was
965
+ * unlocked, which means next pointer could be invalid. Restart
966
+ * search.
967
+ */
968
+ prio = tp->prio + 1;
969
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
970
+
971
+ for (; tp; tp = tcf_chain_dereference(tp->next, chain))
972
+ if (!tp->deleting && tp->prio >= prio)
973
+ break;
974
+ } else {
975
+ tp = tcf_chain_dereference(tp->next, chain);
976
+ }
977
+
978
+ if (tp)
979
+ tcf_proto_get(tp);
980
+
981
+ mutex_unlock(&chain->filter_chain_lock);
982
+
983
+ return tp;
984
+}
985
+
986
+/* Function to be used by all clients that want to iterate over all tp's on
987
+ * chain. Users of this function must be tolerant to concurrent tp
988
+ * insertion/deletion or ensure that no concurrent chain modification is
989
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
990
+ * consistent dump because rtnl lock is released each time skb is filled with
991
+ * data and sent to user-space.
992
+ */
993
+
994
+struct tcf_proto *
995
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
996
+ bool rtnl_held)
997
+{
998
+ struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
999
+
1000
+ if (tp)
1001
+ tcf_proto_put(tp, rtnl_held, NULL);
1002
+
1003
+ return tp_next;
1004
+}
1005
+EXPORT_SYMBOL(tcf_get_next_proto);
1006
+
1007
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1008
+{
1009
+ struct tcf_chain *chain;
1010
+
1011
+ /* Last reference to block. At this point chains cannot be added or
1012
+ * removed concurrently.
1013
+ */
1014
+ for (chain = tcf_get_next_chain(block, NULL);
1015
+ chain;
1016
+ chain = tcf_get_next_chain(block, chain)) {
1017
+ tcf_chain_put_explicitly_created(chain);
1018
+ tcf_chain_flush(chain, rtnl_held);
1019
+ }
1020
+}
1021
+
1022
+/* Lookup Qdisc and increments its reference counter.
1023
+ * Set parent, if necessary.
1024
+ */
1025
+
1026
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1027
+ u32 *parent, int ifindex, bool rtnl_held,
1028
+ struct netlink_ext_ack *extack)
1029
+{
1030
+ const struct Qdisc_class_ops *cops;
1031
+ struct net_device *dev;
1032
+ int err = 0;
1033
+
1034
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1035
+ return 0;
1036
+
1037
+ rcu_read_lock();
1038
+
1039
+ /* Find link */
1040
+ dev = dev_get_by_index_rcu(net, ifindex);
1041
+ if (!dev) {
1042
+ rcu_read_unlock();
1043
+ return -ENODEV;
1044
+ }
1045
+
1046
+ /* Find qdisc */
1047
+ if (!*parent) {
1048
+ *q = rcu_dereference(dev->qdisc);
1049
+ *parent = (*q)->handle;
1050
+ } else {
1051
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1052
+ if (!*q) {
1053
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1054
+ err = -EINVAL;
1055
+ goto errout_rcu;
1056
+ }
1057
+ }
1058
+
1059
+ *q = qdisc_refcount_inc_nz(*q);
1060
+ if (!*q) {
1061
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1062
+ err = -EINVAL;
1063
+ goto errout_rcu;
1064
+ }
1065
+
1066
+ /* Is it classful? */
1067
+ cops = (*q)->ops->cl_ops;
1068
+ if (!cops) {
1069
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
1070
+ err = -EINVAL;
1071
+ goto errout_qdisc;
1072
+ }
1073
+
1074
+ if (!cops->tcf_block) {
1075
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1076
+ err = -EOPNOTSUPP;
1077
+ goto errout_qdisc;
1078
+ }
1079
+
1080
+errout_rcu:
1081
+ /* At this point we know that qdisc is not noop_qdisc,
1082
+ * which means that qdisc holds a reference to net_device
1083
+ * and we hold a reference to qdisc, so it is safe to release
1084
+ * rcu read lock.
1085
+ */
1086
+ rcu_read_unlock();
1087
+ return err;
1088
+
1089
+errout_qdisc:
1090
+ rcu_read_unlock();
1091
+
1092
+ if (rtnl_held)
1093
+ qdisc_put(*q);
1094
+ else
1095
+ qdisc_put_unlocked(*q);
1096
+ *q = NULL;
1097
+
1098
+ return err;
1099
+}
1100
+
1101
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1102
+ int ifindex, struct netlink_ext_ack *extack)
1103
+{
1104
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1105
+ return 0;
1106
+
1107
+ /* Do we search for filter, attached to class? */
1108
+ if (TC_H_MIN(parent)) {
1109
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1110
+
1111
+ *cl = cops->find(q, parent);
1112
+ if (*cl == 0) {
1113
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1114
+ return -ENOENT;
1115
+ }
1116
+ }
1117
+
1118
+ return 0;
1119
+}
1120
+
1121
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1122
+ unsigned long cl, int ifindex,
1123
+ u32 block_index,
1124
+ struct netlink_ext_ack *extack)
1125
+{
1126
+ struct tcf_block *block;
1127
+
1128
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1129
+ block = tcf_block_refcnt_get(net, block_index);
1130
+ if (!block) {
1131
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
1132
+ return ERR_PTR(-EINVAL);
1133
+ }
1134
+ } else {
1135
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1136
+
1137
+ block = cops->tcf_block(q, cl, extack);
1138
+ if (!block)
1139
+ return ERR_PTR(-EINVAL);
1140
+
1141
+ if (tcf_block_shared(block)) {
1142
+ NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1143
+ return ERR_PTR(-EOPNOTSUPP);
1144
+ }
1145
+
1146
+ /* Always take reference to block in order to support execution
1147
+ * of rules update path of cls API without rtnl lock. Caller
1148
+ * must release block when it is finished using it. 'if' block
1149
+ * of this conditional obtain reference to block by calling
1150
+ * tcf_block_refcnt_get().
1151
+ */
1152
+ refcount_inc(&block->refcnt);
1153
+ }
1154
+
1155
+ return block;
1156
+}
1157
+
1158
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1159
+ struct tcf_block_ext_info *ei, bool rtnl_held)
1160
+{
1161
+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1162
+ /* Flushing/putting all chains will cause the block to be
1163
+ * deallocated when last chain is freed. However, if chain_list
1164
+ * is empty, block has to be manually deallocated. After block
1165
+ * reference counter reached 0, it is no longer possible to
1166
+ * increment it or add new chains to block.
1167
+ */
1168
+ bool free_block = list_empty(&block->chain_list);
1169
+
1170
+ mutex_unlock(&block->lock);
1171
+ if (tcf_block_shared(block))
1172
+ tcf_block_remove(block, block->net);
1173
+
1174
+ if (q)
1175
+ tcf_block_offload_unbind(block, q, ei);
1176
+
1177
+ if (free_block)
1178
+ tcf_block_destroy(block);
1179
+ else
1180
+ tcf_block_flush_all_chains(block, rtnl_held);
1181
+ } else if (q) {
1182
+ tcf_block_offload_unbind(block, q, ei);
1183
+ }
1184
+}
1185
+
1186
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1187
+{
1188
+ __tcf_block_put(block, NULL, NULL, rtnl_held);
1189
+}
1190
+
5321191 /* Find tcf block.
5331192 * Set q, parent, cl when appropriate.
5341193 */
....@@ -541,121 +1200,60 @@
5411200 struct tcf_block *block;
5421201 int err = 0;
5431202
544
- if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
545
- block = tcf_block_lookup(net, block_index);
546
- if (!block) {
547
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
548
- return ERR_PTR(-EINVAL);
549
- }
550
- } else {
551
- const struct Qdisc_class_ops *cops;
552
- struct net_device *dev;
1203
+ ASSERT_RTNL();
5531204
554
- rcu_read_lock();
1205
+ err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1206
+ if (err)
1207
+ goto errout;
5551208
556
- /* Find link */
557
- dev = dev_get_by_index_rcu(net, ifindex);
558
- if (!dev) {
559
- rcu_read_unlock();
560
- return ERR_PTR(-ENODEV);
561
- }
1209
+ err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1210
+ if (err)
1211
+ goto errout_qdisc;
5621212
563
- /* Find qdisc */
564
- if (!*parent) {
565
- *q = dev->qdisc;
566
- *parent = (*q)->handle;
567
- } else {
568
- *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
569
- if (!*q) {
570
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
571
- err = -EINVAL;
572
- goto errout_rcu;
573
- }
574
- }
575
-
576
- *q = qdisc_refcount_inc_nz(*q);
577
- if (!*q) {
578
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
579
- err = -EINVAL;
580
- goto errout_rcu;
581
- }
582
-
583
- /* Is it classful? */
584
- cops = (*q)->ops->cl_ops;
585
- if (!cops) {
586
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
587
- err = -EINVAL;
588
- goto errout_rcu;
589
- }
590
-
591
- if (!cops->tcf_block) {
592
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
593
- err = -EOPNOTSUPP;
594
- goto errout_rcu;
595
- }
596
-
597
- /* At this point we know that qdisc is not noop_qdisc,
598
- * which means that qdisc holds a reference to net_device
599
- * and we hold a reference to qdisc, so it is safe to release
600
- * rcu read lock.
601
- */
602
- rcu_read_unlock();
603
-
604
- /* Do we search for filter, attached to class? */
605
- if (TC_H_MIN(*parent)) {
606
- *cl = cops->find(*q, *parent);
607
- if (*cl == 0) {
608
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
609
- err = -ENOENT;
610
- goto errout_qdisc;
611
- }
612
- }
613
-
614
- /* And the last stroke */
615
- block = cops->tcf_block(*q, *cl, extack);
616
- if (!block) {
617
- err = -EINVAL;
618
- goto errout_qdisc;
619
- }
620
- if (tcf_block_shared(block)) {
621
- NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
622
- err = -EOPNOTSUPP;
623
- goto errout_qdisc;
624
- }
1213
+ block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1214
+ if (IS_ERR(block)) {
1215
+ err = PTR_ERR(block);
1216
+ goto errout_qdisc;
6251217 }
6261218
6271219 return block;
6281220
629
-errout_rcu:
630
- rcu_read_unlock();
6311221 errout_qdisc:
632
- if (*q) {
1222
+ if (*q)
6331223 qdisc_put(*q);
634
- *q = NULL;
635
- }
1224
+errout:
1225
+ *q = NULL;
6361226 return ERR_PTR(err);
6371227 }
6381228
639
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
1229
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1230
+ bool rtnl_held)
6401231 {
641
- if (q)
642
- qdisc_put(q);
1232
+ if (!IS_ERR_OR_NULL(block))
1233
+ tcf_block_refcnt_put(block, rtnl_held);
1234
+
1235
+ if (q) {
1236
+ if (rtnl_held)
1237
+ qdisc_put(q);
1238
+ else
1239
+ qdisc_put_unlocked(q);
1240
+ }
6431241 }
6441242
6451243 struct tcf_block_owner_item {
6461244 struct list_head list;
6471245 struct Qdisc *q;
648
- enum tcf_block_binder_type binder_type;
1246
+ enum flow_block_binder_type binder_type;
6491247 };
6501248
6511249 static void
6521250 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
6531251 struct Qdisc *q,
654
- enum tcf_block_binder_type binder_type)
1252
+ enum flow_block_binder_type binder_type)
6551253 {
6561254 if (block->keep_dst &&
657
- binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
658
- binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1255
+ binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1256
+ binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
6591257 netif_keep_dst(qdisc_dev(q));
6601258 }
6611259
....@@ -672,7 +1270,7 @@
6721270
6731271 static int tcf_block_owner_add(struct tcf_block *block,
6741272 struct Qdisc *q,
675
- enum tcf_block_binder_type binder_type)
1273
+ enum flow_block_binder_type binder_type)
6761274 {
6771275 struct tcf_block_owner_item *item;
6781276
....@@ -687,7 +1285,7 @@
6871285
6881286 static void tcf_block_owner_del(struct tcf_block *block,
6891287 struct Qdisc *q,
690
- enum tcf_block_binder_type binder_type)
1288
+ enum flow_block_binder_type binder_type)
6911289 {
6921290 struct tcf_block_owner_item *item;
6931291
....@@ -707,21 +1305,16 @@
7071305 {
7081306 struct net *net = qdisc_net(q);
7091307 struct tcf_block *block = NULL;
710
- bool created = false;
7111308 int err;
7121309
713
- if (ei->block_index) {
1310
+ if (ei->block_index)
7141311 /* block_index not 0 means the shared block is requested */
715
- block = tcf_block_lookup(net, ei->block_index);
716
- if (block)
717
- block->refcnt++;
718
- }
1312
+ block = tcf_block_refcnt_get(net, ei->block_index);
7191313
7201314 if (!block) {
7211315 block = tcf_block_create(net, q, ei->block_index, extack);
7221316 if (IS_ERR(block))
7231317 return PTR_ERR(block);
724
- created = true;
7251318 if (tcf_block_shared(block)) {
7261319 err = tcf_block_insert(block, net, extack);
7271320 if (err)
....@@ -751,14 +1344,8 @@
7511344 err_chain0_head_change_cb_add:
7521345 tcf_block_owner_del(block, q, ei->binder_type);
7531346 err_block_owner_add:
754
- if (created) {
755
- if (tcf_block_shared(block))
756
- tcf_block_remove(block, net);
7571347 err_block_insert:
758
- kfree(block);
759
- } else {
760
- block->refcnt--;
761
- }
1348
+ tcf_block_refcnt_put(block, true);
7621349 return err;
7631350 }
7641351 EXPORT_SYMBOL(tcf_block_get_ext);
....@@ -790,42 +1377,12 @@
7901377 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
7911378 struct tcf_block_ext_info *ei)
7921379 {
793
- struct tcf_chain *chain, *tmp;
794
-
7951380 if (!block)
7961381 return;
7971382 tcf_chain0_head_change_cb_del(block, ei);
7981383 tcf_block_owner_del(block, q, ei->binder_type);
7991384
800
- if (block->refcnt == 1) {
801
- if (tcf_block_shared(block))
802
- tcf_block_remove(block, block->net);
803
-
804
- /* Hold a refcnt for all chains, so that they don't disappear
805
- * while we are iterating.
806
- */
807
- list_for_each_entry(chain, &block->chain_list, list)
808
- tcf_chain_hold(chain);
809
-
810
- list_for_each_entry(chain, &block->chain_list, list)
811
- tcf_chain_flush(chain);
812
- }
813
-
814
- tcf_block_offload_unbind(block, q, ei);
815
-
816
- if (block->refcnt == 1) {
817
- /* At this point, all the chains should have refcnt >= 1. */
818
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
819
- tcf_chain_put_explicitly_created(chain);
820
- tcf_chain_put(chain);
821
- }
822
-
823
- block->refcnt--;
824
- if (list_empty(&block->chain_list))
825
- kfree(block);
826
- } else {
827
- block->refcnt--;
828
- }
1385
+ __tcf_block_put(block, q, ei, true);
8291386 }
8301387 EXPORT_SYMBOL(tcf_block_put_ext);
8311388
....@@ -840,55 +1397,26 @@
8401397
8411398 EXPORT_SYMBOL(tcf_block_put);
8421399
843
-struct tcf_block_cb {
844
- struct list_head list;
845
- tc_setup_cb_t *cb;
846
- void *cb_ident;
847
- void *cb_priv;
848
- unsigned int refcnt;
849
-};
850
-
851
-void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
852
-{
853
- return block_cb->cb_priv;
854
-}
855
-EXPORT_SYMBOL(tcf_block_cb_priv);
856
-
857
-struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
858
- tc_setup_cb_t *cb, void *cb_ident)
859
-{ struct tcf_block_cb *block_cb;
860
-
861
- list_for_each_entry(block_cb, &block->cb_list, list)
862
- if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
863
- return block_cb;
864
- return NULL;
865
-}
866
-EXPORT_SYMBOL(tcf_block_cb_lookup);
867
-
868
-void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
869
-{
870
- block_cb->refcnt++;
871
-}
872
-EXPORT_SYMBOL(tcf_block_cb_incref);
873
-
874
-unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
875
-{
876
- return --block_cb->refcnt;
877
-}
878
-EXPORT_SYMBOL(tcf_block_cb_decref);
879
-
8801400 static int
881
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
1401
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
8821402 void *cb_priv, bool add, bool offload_in_use,
8831403 struct netlink_ext_ack *extack)
8841404 {
885
- struct tcf_chain *chain;
886
- struct tcf_proto *tp;
1405
+ struct tcf_chain *chain, *chain_prev;
1406
+ struct tcf_proto *tp, *tp_prev;
8871407 int err;
8881408
889
- list_for_each_entry(chain, &block->chain_list, list) {
890
- for (tp = rtnl_dereference(chain->filter_chain); tp;
891
- tp = rtnl_dereference(tp->next)) {
1409
+ lockdep_assert_held(&block->cb_lock);
1410
+
1411
+ for (chain = __tcf_get_next_chain(block, NULL);
1412
+ chain;
1413
+ chain_prev = chain,
1414
+ chain = __tcf_get_next_chain(block, chain),
1415
+ tcf_chain_put(chain_prev)) {
1416
+ for (tp = __tcf_get_next_proto(chain, NULL); tp;
1417
+ tp_prev = tp,
1418
+ tp = __tcf_get_next_proto(chain, tp),
1419
+ tcf_proto_put(tp_prev, true, NULL)) {
8921420 if (tp->ops->reoffload) {
8931421 err = tp->ops->reoffload(tp, add, cb, cb_priv,
8941422 extack);
....@@ -905,105 +1433,107 @@
9051433 return 0;
9061434
9071435 err_playback_remove:
1436
+ tcf_proto_put(tp, true, NULL);
1437
+ tcf_chain_put(chain);
9081438 tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
9091439 extack);
9101440 return err;
9111441 }
9121442
913
-struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
914
- tc_setup_cb_t *cb, void *cb_ident,
915
- void *cb_priv,
916
- struct netlink_ext_ack *extack)
1443
+static int tcf_block_bind(struct tcf_block *block,
1444
+ struct flow_block_offload *bo)
9171445 {
918
- struct tcf_block_cb *block_cb;
919
- int err;
1446
+ struct flow_block_cb *block_cb, *next;
1447
+ int err, i = 0;
9201448
921
- /* Replay any already present rules */
922
- err = tcf_block_playback_offloads(block, cb, cb_priv, true,
923
- tcf_block_offload_in_use(block),
924
- extack);
925
- if (err)
926
- return ERR_PTR(err);
1449
+ lockdep_assert_held(&block->cb_lock);
9271450
928
- block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
929
- if (!block_cb)
930
- return ERR_PTR(-ENOMEM);
931
- block_cb->cb = cb;
932
- block_cb->cb_ident = cb_ident;
933
- block_cb->cb_priv = cb_priv;
934
- list_add(&block_cb->list, &block->cb_list);
935
- return block_cb;
936
-}
937
-EXPORT_SYMBOL(__tcf_block_cb_register);
1451
+ list_for_each_entry(block_cb, &bo->cb_list, list) {
1452
+ err = tcf_block_playback_offloads(block, block_cb->cb,
1453
+ block_cb->cb_priv, true,
1454
+ tcf_block_offload_in_use(block),
1455
+ bo->extack);
1456
+ if (err)
1457
+ goto err_unroll;
1458
+ if (!bo->unlocked_driver_cb)
1459
+ block->lockeddevcnt++;
9381460
939
-int tcf_block_cb_register(struct tcf_block *block,
940
- tc_setup_cb_t *cb, void *cb_ident,
941
- void *cb_priv, struct netlink_ext_ack *extack)
942
-{
943
- struct tcf_block_cb *block_cb;
944
-
945
- block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
946
- extack);
947
- return PTR_ERR_OR_ZERO(block_cb);
948
-}
949
-EXPORT_SYMBOL(tcf_block_cb_register);
950
-
951
-void __tcf_block_cb_unregister(struct tcf_block *block,
952
- struct tcf_block_cb *block_cb)
953
-{
954
- tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
955
- false, tcf_block_offload_in_use(block),
956
- NULL);
957
- list_del(&block_cb->list);
958
- kfree(block_cb);
959
-}
960
-EXPORT_SYMBOL(__tcf_block_cb_unregister);
961
-
962
-void tcf_block_cb_unregister(struct tcf_block *block,
963
- tc_setup_cb_t *cb, void *cb_ident)
964
-{
965
- struct tcf_block_cb *block_cb;
966
-
967
- block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
968
- if (!block_cb)
969
- return;
970
- __tcf_block_cb_unregister(block, block_cb);
971
-}
972
-EXPORT_SYMBOL(tcf_block_cb_unregister);
973
-
974
-static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
975
- void *type_data, bool err_stop)
976
-{
977
- struct tcf_block_cb *block_cb;
978
- int ok_count = 0;
979
- int err;
980
-
981
- /* Make sure all netdevs sharing this block are offload-capable. */
982
- if (block->nooffloaddevcnt && err_stop)
983
- return -EOPNOTSUPP;
984
-
985
- list_for_each_entry(block_cb, &block->cb_list, list) {
986
- err = block_cb->cb(type, type_data, block_cb->cb_priv);
987
- if (err) {
988
- if (err_stop)
989
- return err;
990
- } else {
991
- ok_count++;
992
- }
1461
+ i++;
9931462 }
994
- return ok_count;
1463
+ list_splice(&bo->cb_list, &block->flow_block.cb_list);
1464
+
1465
+ return 0;
1466
+
1467
+err_unroll:
1468
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1469
+ if (i-- > 0) {
1470
+ list_del(&block_cb->list);
1471
+ tcf_block_playback_offloads(block, block_cb->cb,
1472
+ block_cb->cb_priv, false,
1473
+ tcf_block_offload_in_use(block),
1474
+ NULL);
1475
+ if (!bo->unlocked_driver_cb)
1476
+ block->lockeddevcnt--;
1477
+ }
1478
+ flow_block_cb_free(block_cb);
1479
+ }
1480
+
1481
+ return err;
1482
+}
1483
+
1484
+static void tcf_block_unbind(struct tcf_block *block,
1485
+ struct flow_block_offload *bo)
1486
+{
1487
+ struct flow_block_cb *block_cb, *next;
1488
+
1489
+ lockdep_assert_held(&block->cb_lock);
1490
+
1491
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1492
+ tcf_block_playback_offloads(block, block_cb->cb,
1493
+ block_cb->cb_priv, false,
1494
+ tcf_block_offload_in_use(block),
1495
+ NULL);
1496
+ list_del(&block_cb->list);
1497
+ flow_block_cb_free(block_cb);
1498
+ if (!bo->unlocked_driver_cb)
1499
+ block->lockeddevcnt--;
1500
+ }
1501
+}
1502
+
1503
+static int tcf_block_setup(struct tcf_block *block,
1504
+ struct flow_block_offload *bo)
1505
+{
1506
+ int err;
1507
+
1508
+ switch (bo->command) {
1509
+ case FLOW_BLOCK_BIND:
1510
+ err = tcf_block_bind(block, bo);
1511
+ break;
1512
+ case FLOW_BLOCK_UNBIND:
1513
+ err = 0;
1514
+ tcf_block_unbind(block, bo);
1515
+ break;
1516
+ default:
1517
+ WARN_ON_ONCE(1);
1518
+ err = -EOPNOTSUPP;
1519
+ }
1520
+
1521
+ return err;
9951522 }
9961523
9971524 /* Main classifier routine: scans classifier chain attached
9981525 * to this qdisc, (optionally) tests for protocol and asks
9991526 * specific classifiers.
10001527 */
1001
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1002
- struct tcf_result *res, bool compat_mode)
1528
+static inline int __tcf_classify(struct sk_buff *skb,
1529
+ const struct tcf_proto *tp,
1530
+ const struct tcf_proto *orig_tp,
1531
+ struct tcf_result *res,
1532
+ bool compat_mode,
1533
+ u32 *last_executed_chain)
10031534 {
10041535 #ifdef CONFIG_NET_CLS_ACT
1005
- const int max_reclassify_loop = 4;
1006
- const struct tcf_proto *orig_tp = tp;
1536
+ const int max_reclassify_loop = 16;
10071537 const struct tcf_proto *first_tp;
10081538 int limit = 0;
10091539
....@@ -1021,9 +1551,11 @@
10211551 #ifdef CONFIG_NET_CLS_ACT
10221552 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
10231553 first_tp = orig_tp;
1554
+ *last_executed_chain = first_tp->chain->index;
10241555 goto reset;
10251556 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
10261557 first_tp = res->goto_tp;
1558
+ *last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
10271559 goto reset;
10281560 }
10291561 #endif
....@@ -1046,39 +1578,188 @@
10461578 goto reclassify;
10471579 #endif
10481580 }
1581
+
1582
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1583
+ struct tcf_result *res, bool compat_mode)
1584
+{
1585
+ u32 last_executed_chain = 0;
1586
+
1587
+ return __tcf_classify(skb, tp, tp, res, compat_mode,
1588
+ &last_executed_chain);
1589
+}
10491590 EXPORT_SYMBOL(tcf_classify);
1591
+
1592
+int tcf_classify_ingress(struct sk_buff *skb,
1593
+ const struct tcf_block *ingress_block,
1594
+ const struct tcf_proto *tp,
1595
+ struct tcf_result *res, bool compat_mode)
1596
+{
1597
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1598
+ u32 last_executed_chain = 0;
1599
+
1600
+ return __tcf_classify(skb, tp, tp, res, compat_mode,
1601
+ &last_executed_chain);
1602
+#else
1603
+ u32 last_executed_chain = tp ? tp->chain->index : 0;
1604
+ const struct tcf_proto *orig_tp = tp;
1605
+ struct tc_skb_ext *ext;
1606
+ int ret;
1607
+
1608
+ ext = skb_ext_find(skb, TC_SKB_EXT);
1609
+
1610
+ if (ext && ext->chain) {
1611
+ struct tcf_chain *fchain;
1612
+
1613
+ fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
1614
+ if (!fchain)
1615
+ return TC_ACT_SHOT;
1616
+
1617
+ /* Consume, so cloned/redirect skbs won't inherit ext */
1618
+ skb_ext_del(skb, TC_SKB_EXT);
1619
+
1620
+ tp = rcu_dereference_bh(fchain->filter_chain);
1621
+ last_executed_chain = fchain->index;
1622
+ }
1623
+
1624
+ ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
1625
+ &last_executed_chain);
1626
+
1627
+ /* If we missed on some chain */
1628
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1629
+ ext = tc_skb_ext_alloc(skb);
1630
+ if (WARN_ON_ONCE(!ext))
1631
+ return TC_ACT_SHOT;
1632
+ ext->chain = last_executed_chain;
1633
+ ext->mru = qdisc_skb_cb(skb)->mru;
1634
+ }
1635
+
1636
+ return ret;
1637
+#endif
1638
+}
1639
+EXPORT_SYMBOL(tcf_classify_ingress);
10501640
10511641 struct tcf_chain_info {
10521642 struct tcf_proto __rcu **pprev;
10531643 struct tcf_proto __rcu *next;
10541644 };
10551645
1056
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
1646
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1647
+ struct tcf_chain_info *chain_info)
10571648 {
1058
- return rtnl_dereference(*chain_info->pprev);
1649
+ return tcf_chain_dereference(*chain_info->pprev, chain);
10591650 }
10601651
1061
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
1062
- struct tcf_chain_info *chain_info,
1063
- struct tcf_proto *tp)
1652
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
1653
+ struct tcf_chain_info *chain_info,
1654
+ struct tcf_proto *tp)
10641655 {
1656
+ if (chain->flushing)
1657
+ return -EAGAIN;
1658
+
1659
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
10651660 if (*chain_info->pprev == chain->filter_chain)
10661661 tcf_chain0_head_change(chain, tp);
1067
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
1662
+ tcf_proto_get(tp);
10681663 rcu_assign_pointer(*chain_info->pprev, tp);
1069
- tcf_chain_hold(chain);
1664
+
1665
+ return 0;
10701666 }
10711667
10721668 static void tcf_chain_tp_remove(struct tcf_chain *chain,
10731669 struct tcf_chain_info *chain_info,
10741670 struct tcf_proto *tp)
10751671 {
1076
- struct tcf_proto *next = rtnl_dereference(chain_info->next);
1672
+ struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
10771673
1674
+ tcf_proto_mark_delete(tp);
10781675 if (tp == chain->filter_chain)
10791676 tcf_chain0_head_change(chain, next);
10801677 RCU_INIT_POINTER(*chain_info->pprev, next);
1081
- tcf_chain_put(chain);
1678
+}
1679
+
1680
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1681
+ struct tcf_chain_info *chain_info,
1682
+ u32 protocol, u32 prio,
1683
+ bool prio_allocate);
1684
+
1685
+/* Try to insert new proto.
1686
+ * If proto with specified priority already exists, free new proto
1687
+ * and return existing one.
1688
+ */
1689
+
1690
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1691
+ struct tcf_proto *tp_new,
1692
+ u32 protocol, u32 prio,
1693
+ bool rtnl_held)
1694
+{
1695
+ struct tcf_chain_info chain_info;
1696
+ struct tcf_proto *tp;
1697
+ int err = 0;
1698
+
1699
+ mutex_lock(&chain->filter_chain_lock);
1700
+
1701
+ if (tcf_proto_exists_destroying(chain, tp_new)) {
1702
+ mutex_unlock(&chain->filter_chain_lock);
1703
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1704
+ return ERR_PTR(-EAGAIN);
1705
+ }
1706
+
1707
+ tp = tcf_chain_tp_find(chain, &chain_info,
1708
+ protocol, prio, false);
1709
+ if (!tp)
1710
+ err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1711
+ mutex_unlock(&chain->filter_chain_lock);
1712
+
1713
+ if (tp) {
1714
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1715
+ tp_new = tp;
1716
+ } else if (err) {
1717
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1718
+ tp_new = ERR_PTR(err);
1719
+ }
1720
+
1721
+ return tp_new;
1722
+}
1723
+
1724
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1725
+ struct tcf_proto *tp, bool rtnl_held,
1726
+ struct netlink_ext_ack *extack)
1727
+{
1728
+ struct tcf_chain_info chain_info;
1729
+ struct tcf_proto *tp_iter;
1730
+ struct tcf_proto **pprev;
1731
+ struct tcf_proto *next;
1732
+
1733
+ mutex_lock(&chain->filter_chain_lock);
1734
+
1735
+ /* Atomically find and remove tp from chain. */
1736
+ for (pprev = &chain->filter_chain;
1737
+ (tp_iter = tcf_chain_dereference(*pprev, chain));
1738
+ pprev = &tp_iter->next) {
1739
+ if (tp_iter == tp) {
1740
+ chain_info.pprev = pprev;
1741
+ chain_info.next = tp_iter->next;
1742
+ WARN_ON(tp_iter->deleting);
1743
+ break;
1744
+ }
1745
+ }
1746
+ /* Verify that tp still exists and no new filters were inserted
1747
+ * concurrently.
1748
+ * Mark tp for deletion if it is empty.
1749
+ */
1750
+ if (!tp_iter || !tcf_proto_check_delete(tp)) {
1751
+ mutex_unlock(&chain->filter_chain_lock);
1752
+ return;
1753
+ }
1754
+
1755
+ tcf_proto_signal_destroying(chain, tp);
1756
+ next = tcf_chain_dereference(chain_info.next, chain);
1757
+ if (tp == chain->filter_chain)
1758
+ tcf_chain0_head_change(chain, next);
1759
+ RCU_INIT_POINTER(*chain_info.pprev, next);
1760
+ mutex_unlock(&chain->filter_chain_lock);
1761
+
1762
+ tcf_proto_put(tp, rtnl_held, extack);
10821763 }
10831764
10841765 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
....@@ -1091,7 +1772,8 @@
10911772
10921773 /* Check the chain for existence of proto-tcf with this priority */
10931774 for (pprev = &chain->filter_chain;
1094
- (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
1775
+ (tp = tcf_chain_dereference(*pprev, chain));
1776
+ pprev = &tp->next) {
10951777 if (tp->prio >= prio) {
10961778 if (tp->prio == prio) {
10971779 if (prio_allocate ||
....@@ -1104,14 +1786,20 @@
11041786 }
11051787 }
11061788 chain_info->pprev = pprev;
1107
- chain_info->next = tp ? tp->next : NULL;
1789
+ if (tp) {
1790
+ chain_info->next = tp->next;
1791
+ tcf_proto_get(tp);
1792
+ } else {
1793
+ chain_info->next = NULL;
1794
+ }
11081795 return tp;
11091796 }
11101797
11111798 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
11121799 struct tcf_proto *tp, struct tcf_block *block,
11131800 struct Qdisc *q, u32 parent, void *fh,
1114
- u32 portid, u32 seq, u16 flags, int event)
1801
+ u32 portid, u32 seq, u16 flags, int event,
1802
+ bool terse_dump, bool rtnl_held)
11151803 {
11161804 struct tcmsg *tcm;
11171805 struct nlmsghdr *nlh;
....@@ -1138,8 +1826,17 @@
11381826 goto nla_put_failure;
11391827 if (!fh) {
11401828 tcm->tcm_handle = 0;
1829
+ } else if (terse_dump) {
1830
+ if (tp->ops->terse_dump) {
1831
+ if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
1832
+ rtnl_held) < 0)
1833
+ goto nla_put_failure;
1834
+ } else {
1835
+ goto cls_op_not_supp;
1836
+ }
11411837 } else {
1142
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
1838
+ if (tp->ops->dump &&
1839
+ tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
11431840 goto nla_put_failure;
11441841 }
11451842 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
....@@ -1147,6 +1844,7 @@
11471844
11481845 out_nlmsg_trim:
11491846 nla_put_failure:
1847
+cls_op_not_supp:
11501848 nlmsg_trim(skb, b);
11511849 return -1;
11521850 }
....@@ -1154,33 +1852,40 @@
11541852 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
11551853 struct nlmsghdr *n, struct tcf_proto *tp,
11561854 struct tcf_block *block, struct Qdisc *q,
1157
- u32 parent, void *fh, int event, bool unicast)
1855
+ u32 parent, void *fh, int event, bool unicast,
1856
+ bool rtnl_held)
11581857 {
11591858 struct sk_buff *skb;
11601859 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1860
+ int err = 0;
11611861
11621862 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
11631863 if (!skb)
11641864 return -ENOBUFS;
11651865
11661866 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1167
- n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
1867
+ n->nlmsg_seq, n->nlmsg_flags, event,
1868
+ false, rtnl_held) <= 0) {
11681869 kfree_skb(skb);
11691870 return -EINVAL;
11701871 }
11711872
11721873 if (unicast)
1173
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1874
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1875
+ else
1876
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1877
+ n->nlmsg_flags & NLM_F_ECHO);
11741878
1175
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1176
- n->nlmsg_flags & NLM_F_ECHO);
1879
+ if (err > 0)
1880
+ err = 0;
1881
+ return err;
11771882 }
11781883
11791884 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
11801885 struct nlmsghdr *n, struct tcf_proto *tp,
11811886 struct tcf_block *block, struct Qdisc *q,
11821887 u32 parent, void *fh, bool unicast, bool *last,
1183
- struct netlink_ext_ack *extack)
1888
+ bool rtnl_held, struct netlink_ext_ack *extack)
11841889 {
11851890 struct sk_buff *skb;
11861891 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
....@@ -1191,39 +1896,50 @@
11911896 return -ENOBUFS;
11921897
11931898 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1194
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1899
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1900
+ false, rtnl_held) <= 0) {
11951901 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
11961902 kfree_skb(skb);
11971903 return -EINVAL;
11981904 }
11991905
1200
- err = tp->ops->delete(tp, fh, last, extack);
1906
+ err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
12011907 if (err) {
12021908 kfree_skb(skb);
12031909 return err;
12041910 }
12051911
12061912 if (unicast)
1207
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1208
-
1209
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1210
- n->nlmsg_flags & NLM_F_ECHO);
1913
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1914
+ else
1915
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1916
+ n->nlmsg_flags & NLM_F_ECHO);
12111917 if (err < 0)
12121918 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1919
+
1920
+ if (err > 0)
1921
+ err = 0;
12131922 return err;
12141923 }
12151924
12161925 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
12171926 struct tcf_block *block, struct Qdisc *q,
12181927 u32 parent, struct nlmsghdr *n,
1219
- struct tcf_chain *chain, int event)
1928
+ struct tcf_chain *chain, int event,
1929
+ bool rtnl_held)
12201930 {
12211931 struct tcf_proto *tp;
12221932
1223
- for (tp = rtnl_dereference(chain->filter_chain);
1224
- tp; tp = rtnl_dereference(tp->next))
1933
+ for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1934
+ tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
12251935 tfilter_notify(net, oskb, n, tp, block,
1226
- q, parent, NULL, event, false);
1936
+ q, parent, NULL, event, false, rtnl_held);
1937
+}
1938
+
1939
+static void tfilter_put(struct tcf_proto *tp, void *fh)
1940
+{
1941
+ if (tp->ops->put && fh)
1942
+ tp->ops->put(tp, fh);
12271943 }
12281944
12291945 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1231,21 +1947,23 @@
12311947 {
12321948 struct net *net = sock_net(skb->sk);
12331949 struct nlattr *tca[TCA_MAX + 1];
1950
+ char name[IFNAMSIZ];
12341951 struct tcmsg *t;
12351952 u32 protocol;
12361953 u32 prio;
12371954 bool prio_allocate;
12381955 u32 parent;
12391956 u32 chain_index;
1240
- struct Qdisc *q = NULL;
1957
+ struct Qdisc *q;
12411958 struct tcf_chain_info chain_info;
1242
- struct tcf_chain *chain = NULL;
1959
+ struct tcf_chain *chain;
12431960 struct tcf_block *block;
12441961 struct tcf_proto *tp;
12451962 unsigned long cl;
12461963 void *fh;
12471964 int err;
12481965 int tp_created;
1966
+ bool rtnl_held = false;
12491967
12501968 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
12511969 return -EPERM;
....@@ -1253,7 +1971,8 @@
12531971 replay:
12541972 tp_created = 0;
12551973
1256
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1974
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1975
+ rtm_tca_policy, extack);
12571976 if (err < 0)
12581977 return err;
12591978
....@@ -1262,7 +1981,11 @@
12621981 prio = TC_H_MAJ(t->tcm_info);
12631982 prio_allocate = false;
12641983 parent = t->tcm_parent;
1984
+ tp = NULL;
12651985 cl = 0;
1986
+ block = NULL;
1987
+ q = NULL;
1988
+ chain = NULL;
12661989
12671990 if (prio == 0) {
12681991 /* If no priority is provided by the user,
....@@ -1279,12 +2002,38 @@
12792002
12802003 /* Find head of filter chain. */
12812004
1282
- block = tcf_block_find(net, &q, &parent, &cl,
1283
- t->tcm_ifindex, t->tcm_block_index, extack);
2005
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2006
+ if (err)
2007
+ return err;
2008
+
2009
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2010
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2011
+ err = -EINVAL;
2012
+ goto errout;
2013
+ }
2014
+
2015
+ /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2016
+ * block is shared (no qdisc found), qdisc is not unlocked, classifier
2017
+ * type is not specified, classifier is not unlocked.
2018
+ */
2019
+ if (rtnl_held ||
2020
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2021
+ !tcf_proto_is_unlocked(name)) {
2022
+ rtnl_held = true;
2023
+ rtnl_lock();
2024
+ }
2025
+
2026
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2027
+ if (err)
2028
+ goto errout;
2029
+
2030
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2031
+ extack);
12842032 if (IS_ERR(block)) {
12852033 err = PTR_ERR(block);
12862034 goto errout;
12872035 }
2036
+ block->classid = parent;
12882037
12892038 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
12902039 if (chain_index > TC_ACT_EXT_VAL_MASK) {
....@@ -1299,40 +2048,61 @@
12992048 goto errout;
13002049 }
13012050
2051
+ mutex_lock(&chain->filter_chain_lock);
13022052 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
13032053 prio, prio_allocate);
13042054 if (IS_ERR(tp)) {
13052055 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
13062056 err = PTR_ERR(tp);
1307
- goto errout;
2057
+ goto errout_locked;
13082058 }
13092059
13102060 if (tp == NULL) {
2061
+ struct tcf_proto *tp_new = NULL;
2062
+
2063
+ if (chain->flushing) {
2064
+ err = -EAGAIN;
2065
+ goto errout_locked;
2066
+ }
2067
+
13112068 /* Proto-tcf does not exist, create new one */
13122069
13132070 if (tca[TCA_KIND] == NULL || !protocol) {
13142071 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
13152072 err = -EINVAL;
1316
- goto errout;
2073
+ goto errout_locked;
13172074 }
13182075
13192076 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
13202077 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
13212078 err = -ENOENT;
1322
- goto errout;
2079
+ goto errout_locked;
13232080 }
13242081
13252082 if (prio_allocate)
1326
- prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
2083
+ prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2084
+ &chain_info));
13272085
1328
- tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1329
- protocol, prio, chain, extack);
2086
+ mutex_unlock(&chain->filter_chain_lock);
2087
+ tp_new = tcf_proto_create(name, protocol, prio, chain,
2088
+ rtnl_held, extack);
2089
+ if (IS_ERR(tp_new)) {
2090
+ err = PTR_ERR(tp_new);
2091
+ goto errout_tp;
2092
+ }
2093
+
2094
+ tp_created = 1;
2095
+ tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2096
+ rtnl_held);
13302097 if (IS_ERR(tp)) {
13312098 err = PTR_ERR(tp);
1332
- goto errout;
2099
+ goto errout_tp;
13332100 }
1334
- tp_created = 1;
1335
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2101
+ } else {
2102
+ mutex_unlock(&chain->filter_chain_lock);
2103
+ }
2104
+
2105
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
13362106 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
13372107 err = -EINVAL;
13382108 goto errout;
....@@ -1347,12 +2117,14 @@
13472117 goto errout;
13482118 }
13492119 } else if (n->nlmsg_flags & NLM_F_EXCL) {
2120
+ tfilter_put(tp, fh);
13502121 NL_SET_ERR_MSG(extack, "Filter already exists");
13512122 err = -EEXIST;
13522123 goto errout;
13532124 }
13542125
13552126 if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2127
+ tfilter_put(tp, fh);
13562128 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
13572129 err = -EINVAL;
13582130 goto errout;
....@@ -1360,28 +2132,44 @@
13602132
13612133 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
13622134 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1363
- extack);
2135
+ rtnl_held, extack);
13642136 if (err == 0) {
1365
- if (tp_created)
1366
- tcf_chain_tp_insert(chain, &chain_info, tp);
13672137 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1368
- RTM_NEWTFILTER, false);
2138
+ RTM_NEWTFILTER, false, rtnl_held);
2139
+ tfilter_put(tp, fh);
13692140 /* q pointer is NULL for shared blocks */
13702141 if (q)
13712142 q->flags &= ~TCQ_F_CAN_BYPASS;
1372
- } else {
1373
- if (tp_created)
1374
- tcf_proto_destroy(tp, NULL);
13752143 }
13762144
13772145 errout:
1378
- if (chain)
1379
- tcf_chain_put(chain);
1380
- tcf_block_release(q, block);
1381
- if (err == -EAGAIN)
2146
+ if (err && tp_created)
2147
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2148
+errout_tp:
2149
+ if (chain) {
2150
+ if (tp && !IS_ERR(tp))
2151
+ tcf_proto_put(tp, rtnl_held, NULL);
2152
+ if (!tp_created)
2153
+ tcf_chain_put(chain);
2154
+ }
2155
+ tcf_block_release(q, block, rtnl_held);
2156
+
2157
+ if (rtnl_held)
2158
+ rtnl_unlock();
2159
+
2160
+ if (err == -EAGAIN) {
2161
+ /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2162
+ * of target chain.
2163
+ */
2164
+ rtnl_held = true;
13822165 /* Replay the request. */
13832166 goto replay;
2167
+ }
13842168 return err;
2169
+
2170
+errout_locked:
2171
+ mutex_unlock(&chain->filter_chain_lock);
2172
+ goto errout;
13852173 }
13862174
13872175 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1389,6 +2177,7 @@
13892177 {
13902178 struct net *net = sock_net(skb->sk);
13912179 struct nlattr *tca[TCA_MAX + 1];
2180
+ char name[IFNAMSIZ];
13922181 struct tcmsg *t;
13932182 u32 protocol;
13942183 u32 prio;
....@@ -1397,16 +2186,18 @@
13972186 struct Qdisc *q = NULL;
13982187 struct tcf_chain_info chain_info;
13992188 struct tcf_chain *chain = NULL;
1400
- struct tcf_block *block;
2189
+ struct tcf_block *block = NULL;
14012190 struct tcf_proto *tp = NULL;
14022191 unsigned long cl = 0;
14032192 void *fh = NULL;
14042193 int err;
2194
+ bool rtnl_held = false;
14052195
14062196 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
14072197 return -EPERM;
14082198
1409
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2199
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2200
+ rtm_tca_policy, extack);
14102201 if (err < 0)
14112202 return err;
14122203
....@@ -1422,8 +2213,32 @@
14222213
14232214 /* Find head of filter chain. */
14242215
1425
- block = tcf_block_find(net, &q, &parent, &cl,
1426
- t->tcm_ifindex, t->tcm_block_index, extack);
2216
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2217
+ if (err)
2218
+ return err;
2219
+
2220
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2221
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2222
+ err = -EINVAL;
2223
+ goto errout;
2224
+ }
2225
+ /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2226
+ * found), qdisc is not unlocked, classifier type is not specified,
2227
+ * classifier is not unlocked.
2228
+ */
2229
+ if (!prio ||
2230
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2231
+ !tcf_proto_is_unlocked(name)) {
2232
+ rtnl_held = true;
2233
+ rtnl_lock();
2234
+ }
2235
+
2236
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2237
+ if (err)
2238
+ goto errout;
2239
+
2240
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2241
+ extack);
14272242 if (IS_ERR(block)) {
14282243 err = PTR_ERR(block);
14292244 goto errout;
....@@ -1451,56 +2266,70 @@
14512266
14522267 if (prio == 0) {
14532268 tfilter_notify_chain(net, skb, block, q, parent, n,
1454
- chain, RTM_DELTFILTER);
1455
- tcf_chain_flush(chain);
2269
+ chain, RTM_DELTFILTER, rtnl_held);
2270
+ tcf_chain_flush(chain, rtnl_held);
14562271 err = 0;
14572272 goto errout;
14582273 }
14592274
2275
+ mutex_lock(&chain->filter_chain_lock);
14602276 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
14612277 prio, false);
14622278 if (!tp || IS_ERR(tp)) {
14632279 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
14642280 err = tp ? PTR_ERR(tp) : -ENOENT;
1465
- goto errout;
2281
+ goto errout_locked;
14662282 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
14672283 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
14682284 err = -EINVAL;
2285
+ goto errout_locked;
2286
+ } else if (t->tcm_handle == 0) {
2287
+ tcf_proto_signal_destroying(chain, tp);
2288
+ tcf_chain_tp_remove(chain, &chain_info, tp);
2289
+ mutex_unlock(&chain->filter_chain_lock);
2290
+
2291
+ tcf_proto_put(tp, rtnl_held, NULL);
2292
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2293
+ RTM_DELTFILTER, false, rtnl_held);
2294
+ err = 0;
14692295 goto errout;
14702296 }
2297
+ mutex_unlock(&chain->filter_chain_lock);
14712298
14722299 fh = tp->ops->get(tp, t->tcm_handle);
14732300
14742301 if (!fh) {
1475
- if (t->tcm_handle == 0) {
1476
- tcf_chain_tp_remove(chain, &chain_info, tp);
1477
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1478
- RTM_DELTFILTER, false);
1479
- tcf_proto_destroy(tp, extack);
1480
- err = 0;
1481
- } else {
1482
- NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1483
- err = -ENOENT;
1484
- }
2302
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2303
+ err = -ENOENT;
14852304 } else {
14862305 bool last;
14872306
14882307 err = tfilter_del_notify(net, skb, n, tp, block,
14892308 q, parent, fh, false, &last,
1490
- extack);
2309
+ rtnl_held, extack);
2310
+
14912311 if (err)
14922312 goto errout;
1493
- if (last) {
1494
- tcf_chain_tp_remove(chain, &chain_info, tp);
1495
- tcf_proto_destroy(tp, extack);
1496
- }
2313
+ if (last)
2314
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
14972315 }
14982316
14992317 errout:
1500
- if (chain)
2318
+ if (chain) {
2319
+ if (tp && !IS_ERR(tp))
2320
+ tcf_proto_put(tp, rtnl_held, NULL);
15012321 tcf_chain_put(chain);
1502
- tcf_block_release(q, block);
2322
+ }
2323
+ tcf_block_release(q, block, rtnl_held);
2324
+
2325
+ if (rtnl_held)
2326
+ rtnl_unlock();
2327
+
15032328 return err;
2329
+
2330
+errout_locked:
2331
+ mutex_unlock(&chain->filter_chain_lock);
2332
+ goto errout;
15042333 }
15052334
15062335 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
....@@ -1508,6 +2337,7 @@
15082337 {
15092338 struct net *net = sock_net(skb->sk);
15102339 struct nlattr *tca[TCA_MAX + 1];
2340
+ char name[IFNAMSIZ];
15112341 struct tcmsg *t;
15122342 u32 protocol;
15132343 u32 prio;
....@@ -1516,13 +2346,15 @@
15162346 struct Qdisc *q = NULL;
15172347 struct tcf_chain_info chain_info;
15182348 struct tcf_chain *chain = NULL;
1519
- struct tcf_block *block;
2349
+ struct tcf_block *block = NULL;
15202350 struct tcf_proto *tp = NULL;
15212351 unsigned long cl = 0;
15222352 void *fh = NULL;
15232353 int err;
2354
+ bool rtnl_held = false;
15242355
1525
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2356
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2357
+ rtm_tca_policy, extack);
15262358 if (err < 0)
15272359 return err;
15282360
....@@ -1538,8 +2370,31 @@
15382370
15392371 /* Find head of filter chain. */
15402372
1541
- block = tcf_block_find(net, &q, &parent, &cl,
1542
- t->tcm_ifindex, t->tcm_block_index, extack);
2373
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2374
+ if (err)
2375
+ return err;
2376
+
2377
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2378
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2379
+ err = -EINVAL;
2380
+ goto errout;
2381
+ }
2382
+ /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2383
+ * unlocked, classifier type is not specified, classifier is not
2384
+ * unlocked.
2385
+ */
2386
+ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2387
+ !tcf_proto_is_unlocked(name)) {
2388
+ rtnl_held = true;
2389
+ rtnl_lock();
2390
+ }
2391
+
2392
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2393
+ if (err)
2394
+ goto errout;
2395
+
2396
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2397
+ extack);
15432398 if (IS_ERR(block)) {
15442399 err = PTR_ERR(block);
15452400 goto errout;
....@@ -1558,8 +2413,10 @@
15582413 goto errout;
15592414 }
15602415
2416
+ mutex_lock(&chain->filter_chain_lock);
15612417 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
15622418 prio, false);
2419
+ mutex_unlock(&chain->filter_chain_lock);
15632420 if (!tp || IS_ERR(tp)) {
15642421 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
15652422 err = tp ? PTR_ERR(tp) : -ENOENT;
....@@ -1577,15 +2434,23 @@
15772434 err = -ENOENT;
15782435 } else {
15792436 err = tfilter_notify(net, skb, n, tp, block, q, parent,
1580
- fh, RTM_NEWTFILTER, true);
2437
+ fh, RTM_NEWTFILTER, true, rtnl_held);
15812438 if (err < 0)
15822439 NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
15832440 }
15842441
2442
+ tfilter_put(tp, fh);
15852443 errout:
1586
- if (chain)
2444
+ if (chain) {
2445
+ if (tp && !IS_ERR(tp))
2446
+ tcf_proto_put(tp, rtnl_held, NULL);
15872447 tcf_chain_put(chain);
1588
- tcf_block_release(q, block);
2448
+ }
2449
+ tcf_block_release(q, block, rtnl_held);
2450
+
2451
+ if (rtnl_held)
2452
+ rtnl_unlock();
2453
+
15892454 return err;
15902455 }
15912456
....@@ -1596,6 +2461,7 @@
15962461 struct tcf_block *block;
15972462 struct Qdisc *q;
15982463 u32 parent;
2464
+ bool terse_dump;
15992465 };
16002466
16012467 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
....@@ -1606,21 +2472,25 @@
16062472 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
16072473 n, NETLINK_CB(a->cb->skb).portid,
16082474 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1609
- RTM_NEWTFILTER);
2475
+ RTM_NEWTFILTER, a->terse_dump, true);
16102476 }
16112477
16122478 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
16132479 struct sk_buff *skb, struct netlink_callback *cb,
1614
- long index_start, long *p_index)
2480
+ long index_start, long *p_index, bool terse)
16152481 {
16162482 struct net *net = sock_net(skb->sk);
16172483 struct tcf_block *block = chain->block;
16182484 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2485
+ struct tcf_proto *tp, *tp_prev;
16192486 struct tcf_dump_args arg;
1620
- struct tcf_proto *tp;
16212487
1622
- for (tp = rtnl_dereference(chain->filter_chain);
1623
- tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
2488
+ for (tp = __tcf_get_next_proto(chain, NULL);
2489
+ tp;
2490
+ tp_prev = tp,
2491
+ tp = __tcf_get_next_proto(chain, tp),
2492
+ tcf_proto_put(tp_prev, true, NULL),
2493
+ (*p_index)++) {
16242494 if (*p_index < index_start)
16252495 continue;
16262496 if (TC_H_MAJ(tcm->tcm_info) &&
....@@ -1636,9 +2506,8 @@
16362506 if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
16372507 NETLINK_CB(cb->skb).portid,
16382508 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1639
- RTM_NEWTFILTER) <= 0)
1640
- return false;
1641
-
2509
+ RTM_NEWTFILTER, false, true) <= 0)
2510
+ goto errout;
16422511 cb->args[1] = 1;
16432512 }
16442513 if (!tp->ops->walk)
....@@ -1653,24 +2522,34 @@
16532522 arg.w.skip = cb->args[1] - 1;
16542523 arg.w.count = 0;
16552524 arg.w.cookie = cb->args[2];
1656
- tp->ops->walk(tp, &arg.w);
2525
+ arg.terse_dump = terse;
2526
+ tp->ops->walk(tp, &arg.w, true);
16572527 cb->args[2] = arg.w.cookie;
16582528 cb->args[1] = arg.w.count + 1;
16592529 if (arg.w.stop)
1660
- return false;
2530
+ goto errout;
16612531 }
16622532 return true;
2533
+
2534
+errout:
2535
+ tcf_proto_put(tp, true, NULL);
2536
+ return false;
16632537 }
2538
+
2539
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
2540
+ [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
2541
+};
16642542
16652543 /* called with RTNL */
16662544 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
16672545 {
2546
+ struct tcf_chain *chain, *chain_prev;
16682547 struct net *net = sock_net(skb->sk);
16692548 struct nlattr *tca[TCA_MAX + 1];
16702549 struct Qdisc *q = NULL;
16712550 struct tcf_block *block;
1672
- struct tcf_chain *chain;
16732551 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2552
+ bool terse_dump = false;
16742553 long index_start;
16752554 long index;
16762555 u32 parent;
....@@ -1679,12 +2558,20 @@
16792558 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
16802559 return skb->len;
16812560
1682
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
2561
+ err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2562
+ tcf_tfilter_dump_policy, cb->extack);
16832563 if (err)
16842564 return err;
16852565
2566
+ if (tca[TCA_DUMP_FLAGS]) {
2567
+ struct nla_bitfield32 flags =
2568
+ nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
2569
+
2570
+ terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
2571
+ }
2572
+
16862573 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1687
- block = tcf_block_lookup(net, tcm->tcm_block_index);
2574
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
16882575 if (!block)
16892576 goto out;
16902577 /* If we work with block index, q is NULL and parent value
....@@ -1704,12 +2591,10 @@
17042591 return skb->len;
17052592
17062593 parent = tcm->tcm_parent;
1707
- if (!parent) {
1708
- q = dev->qdisc;
1709
- parent = q->handle;
1710
- } else {
2594
+ if (!parent)
2595
+ q = rtnl_dereference(dev->qdisc);
2596
+ else
17112597 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1712
- }
17132598 if (!q)
17142599 goto out;
17152600 cops = q->ops->cl_ops;
....@@ -1725,6 +2610,7 @@
17252610 block = cops->tcf_block(q, cl, NULL);
17262611 if (!block)
17272612 goto out;
2613
+ parent = block->classid;
17282614 if (tcf_block_shared(block))
17292615 q = NULL;
17302616 }
....@@ -1732,17 +2618,24 @@
17322618 index_start = cb->args[0];
17332619 index = 0;
17342620
1735
- list_for_each_entry(chain, &block->chain_list, list) {
2621
+ for (chain = __tcf_get_next_chain(block, NULL);
2622
+ chain;
2623
+ chain_prev = chain,
2624
+ chain = __tcf_get_next_chain(block, chain),
2625
+ tcf_chain_put(chain_prev)) {
17362626 if (tca[TCA_CHAIN] &&
17372627 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
17382628 continue;
17392629 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1740
- index_start, &index)) {
2630
+ index_start, &index, terse_dump)) {
2631
+ tcf_chain_put(chain);
17412632 err = -EMSGSIZE;
17422633 break;
17432634 }
17442635 }
17452636
2637
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2638
+ tcf_block_refcnt_put(block, true);
17462639 cb->args[0] = index;
17472640
17482641 out:
....@@ -1752,8 +2645,10 @@
17522645 return skb->len;
17532646 }
17542647
1755
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
1756
- struct sk_buff *skb, struct tcf_block *block,
2648
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2649
+ void *tmplt_priv, u32 chain_index,
2650
+ struct net *net, struct sk_buff *skb,
2651
+ struct tcf_block *block,
17572652 u32 portid, u32 seq, u16 flags, int event)
17582653 {
17592654 unsigned char *b = skb_tail_pointer(skb);
....@@ -1762,8 +2657,8 @@
17622657 struct tcmsg *tcm;
17632658 void *priv;
17642659
1765
- ops = chain->tmplt_ops;
1766
- priv = chain->tmplt_priv;
2660
+ ops = tmplt_ops;
2661
+ priv = tmplt_priv;
17672662
17682663 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
17692664 if (!nlh)
....@@ -1781,7 +2676,7 @@
17812676 tcm->tcm_block_index = block->index;
17822677 }
17832678
1784
- if (nla_put_u32(skb, TCA_CHAIN, chain->index))
2679
+ if (nla_put_u32(skb, TCA_CHAIN, chain_index))
17852680 goto nla_put_failure;
17862681
17872682 if (ops) {
....@@ -1807,13 +2702,45 @@
18072702 struct tcf_block *block = chain->block;
18082703 struct net *net = block->net;
18092704 struct sk_buff *skb;
2705
+ int err = 0;
18102706
18112707 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
18122708 if (!skb)
18132709 return -ENOBUFS;
18142710
1815
- if (tc_chain_fill_node(chain, net, skb, block, portid,
2711
+ if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2712
+ chain->index, net, skb, block, portid,
18162713 seq, flags, event) <= 0) {
2714
+ kfree_skb(skb);
2715
+ return -EINVAL;
2716
+ }
2717
+
2718
+ if (unicast)
2719
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2720
+ else
2721
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2722
+ flags & NLM_F_ECHO);
2723
+
2724
+ if (err > 0)
2725
+ err = 0;
2726
+ return err;
2727
+}
2728
+
2729
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2730
+ void *tmplt_priv, u32 chain_index,
2731
+ struct tcf_block *block, struct sk_buff *oskb,
2732
+ u32 seq, u16 flags, bool unicast)
2733
+{
2734
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2735
+ struct net *net = block->net;
2736
+ struct sk_buff *skb;
2737
+
2738
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2739
+ if (!skb)
2740
+ return -ENOBUFS;
2741
+
2742
+ if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2743
+ block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
18172744 kfree_skb(skb);
18182745 return -EINVAL;
18192746 }
....@@ -1829,13 +2756,19 @@
18292756 struct netlink_ext_ack *extack)
18302757 {
18312758 const struct tcf_proto_ops *ops;
2759
+ char name[IFNAMSIZ];
18322760 void *tmplt_priv;
18332761
18342762 /* If kind is not set, user did not specify template. */
18352763 if (!tca[TCA_KIND])
18362764 return 0;
18372765
1838
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
2766
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2767
+ NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2768
+ return -EINVAL;
2769
+ }
2770
+
2771
+ ops = tcf_proto_lookup_ops(name, true, extack);
18392772 if (IS_ERR(ops))
18402773 return PTR_ERR(ops);
18412774 if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
....@@ -1853,16 +2786,15 @@
18532786 return 0;
18542787 }
18552788
1856
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
2789
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2790
+ void *tmplt_priv)
18572791 {
1858
- const struct tcf_proto_ops *ops = chain->tmplt_ops;
1859
-
18602792 /* If template ops are set, no work to do for us. */
1861
- if (!ops)
2793
+ if (!tmplt_ops)
18622794 return;
18632795
1864
- ops->tmplt_destroy(chain->tmplt_priv);
1865
- module_put(ops->owner);
2796
+ tmplt_ops->tmplt_destroy(tmplt_priv);
2797
+ module_put(tmplt_ops->owner);
18662798 }
18672799
18682800 /* Add/delete/get a chain */
....@@ -1875,8 +2807,8 @@
18752807 struct tcmsg *t;
18762808 u32 parent;
18772809 u32 chain_index;
1878
- struct Qdisc *q = NULL;
1879
- struct tcf_chain *chain = NULL;
2810
+ struct Qdisc *q;
2811
+ struct tcf_chain *chain;
18802812 struct tcf_block *block;
18812813 unsigned long cl;
18822814 int err;
....@@ -1886,7 +2818,9 @@
18862818 return -EPERM;
18872819
18882820 replay:
1889
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2821
+ q = NULL;
2822
+ err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2823
+ rtm_tca_policy, extack);
18902824 if (err < 0)
18912825 return err;
18922826
....@@ -1905,6 +2839,8 @@
19052839 err = -EINVAL;
19062840 goto errout_block;
19072841 }
2842
+
2843
+ mutex_lock(&block->lock);
19082844 chain = tcf_chain_lookup(block, chain_index);
19092845 if (n->nlmsg_type == RTM_NEWCHAIN) {
19102846 if (chain) {
....@@ -1916,54 +2852,61 @@
19162852 } else {
19172853 NL_SET_ERR_MSG(extack, "Filter chain already exists");
19182854 err = -EEXIST;
1919
- goto errout_block;
2855
+ goto errout_block_locked;
19202856 }
19212857 } else {
19222858 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
19232859 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
19242860 err = -ENOENT;
1925
- goto errout_block;
2861
+ goto errout_block_locked;
19262862 }
19272863 chain = tcf_chain_create(block, chain_index);
19282864 if (!chain) {
19292865 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
19302866 err = -ENOMEM;
1931
- goto errout_block;
2867
+ goto errout_block_locked;
19322868 }
19332869 }
19342870 } else {
19352871 if (!chain || tcf_chain_held_by_acts_only(chain)) {
19362872 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
19372873 err = -EINVAL;
1938
- goto errout_block;
2874
+ goto errout_block_locked;
19392875 }
19402876 tcf_chain_hold(chain);
19412877 }
19422878
1943
- switch (n->nlmsg_type) {
1944
- case RTM_NEWCHAIN:
1945
- err = tc_chain_tmplt_add(chain, net, tca, extack);
1946
- if (err)
1947
- goto errout;
1948
- /* In case the chain was successfully added, take a reference
1949
- * to the chain. This ensures that an empty chain
1950
- * does not disappear at the end of this function.
2879
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
2880
+ /* Modifying chain requires holding parent block lock. In case
2881
+ * the chain was successfully added, take a reference to the
2882
+ * chain. This ensures that an empty chain does not disappear at
2883
+ * the end of this function.
19512884 */
19522885 tcf_chain_hold(chain);
19532886 chain->explicitly_created = true;
2887
+ }
2888
+ mutex_unlock(&block->lock);
2889
+
2890
+ switch (n->nlmsg_type) {
2891
+ case RTM_NEWCHAIN:
2892
+ err = tc_chain_tmplt_add(chain, net, tca, extack);
2893
+ if (err) {
2894
+ tcf_chain_put_explicitly_created(chain);
2895
+ goto errout;
2896
+ }
2897
+
19542898 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
19552899 RTM_NEWCHAIN, false);
19562900 break;
19572901 case RTM_DELCHAIN:
19582902 tfilter_notify_chain(net, skb, block, q, parent, n,
1959
- chain, RTM_DELTFILTER);
2903
+ chain, RTM_DELTFILTER, true);
19602904 /* Flush the chain first as the user requested chain removal. */
1961
- tcf_chain_flush(chain);
2905
+ tcf_chain_flush(chain, true);
19622906 /* In case the chain was successfully deleted, put a reference
19632907 * to the chain previously taken during addition.
19642908 */
19652909 tcf_chain_put_explicitly_created(chain);
1966
- chain->explicitly_created = false;
19672910 break;
19682911 case RTM_GETCHAIN:
19692912 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
....@@ -1980,11 +2923,15 @@
19802923 errout:
19812924 tcf_chain_put(chain);
19822925 errout_block:
1983
- tcf_block_release(q, block);
2926
+ tcf_block_release(q, block, true);
19842927 if (err == -EAGAIN)
19852928 /* Replay the request. */
19862929 goto replay;
19872930 return err;
2931
+
2932
+errout_block_locked:
2933
+ mutex_unlock(&block->lock);
2934
+ goto errout_block;
19882935 }
19892936
19902937 /* called with RTNL */
....@@ -1994,8 +2941,8 @@
19942941 struct nlattr *tca[TCA_MAX + 1];
19952942 struct Qdisc *q = NULL;
19962943 struct tcf_block *block;
1997
- struct tcf_chain *chain;
19982944 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2945
+ struct tcf_chain *chain;
19992946 long index_start;
20002947 long index;
20012948 u32 parent;
....@@ -2004,13 +2951,13 @@
20042951 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
20052952 return skb->len;
20062953
2007
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
2008
- NULL);
2954
+ err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2955
+ rtm_tca_policy, cb->extack);
20092956 if (err)
20102957 return err;
20112958
20122959 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2013
- block = tcf_block_lookup(net, tcm->tcm_block_index);
2960
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
20142961 if (!block)
20152962 goto out;
20162963 /* If we work with block index, q is NULL and parent value
....@@ -2031,7 +2978,7 @@
20312978
20322979 parent = tcm->tcm_parent;
20332980 if (!parent) {
2034
- q = dev->qdisc;
2981
+ q = rtnl_dereference(dev->qdisc);
20352982 parent = q->handle;
20362983 } else {
20372984 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
....@@ -2058,6 +3005,7 @@
20583005 index_start = cb->args[0];
20593006 index = 0;
20603007
3008
+ mutex_lock(&block->lock);
20613009 list_for_each_entry(chain, &block->chain_list, list) {
20623010 if ((tca[TCA_CHAIN] &&
20633011 nla_get_u32(tca[TCA_CHAIN]) != chain->index))
....@@ -2068,7 +3016,8 @@
20683016 }
20693017 if (tcf_chain_held_by_acts_only(chain))
20703018 continue;
2071
- err = tc_chain_fill_node(chain, net, skb, block,
3019
+ err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
3020
+ chain->index, net, skb, block,
20723021 NETLINK_CB(cb->skb).portid,
20733022 cb->nlh->nlmsg_seq, NLM_F_MULTI,
20743023 RTM_NEWCHAIN);
....@@ -2076,7 +3025,10 @@
20763025 break;
20773026 index++;
20783027 }
3028
+ mutex_unlock(&block->lock);
20793029
3030
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3031
+ tcf_block_refcnt_put(block, true);
20803032 cb->args[0] = index;
20813033
20823034 out:
....@@ -2100,35 +3052,43 @@
21003052
21013053 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
21023054 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
2103
- struct netlink_ext_ack *extack)
3055
+ bool rtnl_held, struct netlink_ext_ack *extack)
21043056 {
21053057 #ifdef CONFIG_NET_CLS_ACT
21063058 {
3059
+ int init_res[TCA_ACT_MAX_PRIO] = {};
21073060 struct tc_action *act;
21083061 size_t attr_size = 0;
21093062
21103063 if (exts->police && tb[exts->police]) {
3064
+ struct tc_action_ops *a_o;
3065
+
3066
+ a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
3067
+ if (IS_ERR(a_o))
3068
+ return PTR_ERR(a_o);
21113069 act = tcf_action_init_1(net, tp, tb[exts->police],
21123070 rate_tlv, "police", ovr,
2113
- TCA_ACT_BIND, true, extack);
3071
+ TCA_ACT_BIND, a_o, init_res,
3072
+ rtnl_held, extack);
3073
+ module_put(a_o->owner);
21143074 if (IS_ERR(act))
21153075 return PTR_ERR(act);
21163076
21173077 act->type = exts->type = TCA_OLD_COMPAT;
21183078 exts->actions[0] = act;
21193079 exts->nr_actions = 1;
3080
+ tcf_idr_insert_many(exts->actions);
21203081 } else if (exts->action && tb[exts->action]) {
21213082 int err;
21223083
21233084 err = tcf_action_init(net, tp, tb[exts->action],
21243085 rate_tlv, NULL, ovr, TCA_ACT_BIND,
2125
- exts->actions, &attr_size, true,
2126
- extack);
3086
+ exts->actions, init_res,
3087
+ &attr_size, rtnl_held, extack);
21273088 if (err < 0)
21283089 return err;
21293090 exts->nr_actions = err;
21303091 }
2131
- exts->net = net;
21323092 }
21333093 #else
21343094 if ((exts->action && tb[exts->action]) ||
....@@ -2175,16 +3135,17 @@
21753135 * tc data even if iproute2 was newer - jhs
21763136 */
21773137 if (exts->type != TCA_OLD_COMPAT) {
2178
- nest = nla_nest_start(skb, exts->action);
3138
+ nest = nla_nest_start_noflag(skb, exts->action);
21793139 if (nest == NULL)
21803140 goto nla_put_failure;
21813141
2182
- if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3142
+ if (tcf_action_dump(skb, exts->actions, 0, 0, false)
3143
+ < 0)
21833144 goto nla_put_failure;
21843145 nla_nest_end(skb, nest);
21853146 } else if (exts->police) {
21863147 struct tc_action *act = tcf_exts_first_act(exts);
2187
- nest = nla_nest_start(skb, exts->police);
3148
+ nest = nla_nest_start_noflag(skb, exts->police);
21883149 if (nest == NULL || !act)
21893150 goto nla_put_failure;
21903151 if (tcf_action_dump_old(skb, act, 0, 0) < 0)
....@@ -2203,6 +3164,31 @@
22033164 }
22043165 EXPORT_SYMBOL(tcf_exts_dump);
22053166
3167
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
3168
+{
3169
+#ifdef CONFIG_NET_CLS_ACT
3170
+ struct nlattr *nest;
3171
+
3172
+ if (!exts->action || !tcf_exts_has_actions(exts))
3173
+ return 0;
3174
+
3175
+ nest = nla_nest_start_noflag(skb, exts->action);
3176
+ if (!nest)
3177
+ goto nla_put_failure;
3178
+
3179
+ if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
3180
+ goto nla_put_failure;
3181
+ nla_nest_end(skb, nest);
3182
+ return 0;
3183
+
3184
+nla_put_failure:
3185
+ nla_nest_cancel(skb, nest);
3186
+ return -1;
3187
+#else
3188
+ return 0;
3189
+#endif
3190
+}
3191
+EXPORT_SYMBOL(tcf_exts_terse_dump);
22063192
22073193 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
22083194 {
....@@ -2215,62 +3201,687 @@
22153201 }
22163202 EXPORT_SYMBOL(tcf_exts_dump_stats);
22173203
2218
-static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
2219
- enum tc_setup_type type,
2220
- void *type_data, bool err_stop)
3204
+static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
22213205 {
2222
- int ok_count = 0;
2223
-#ifdef CONFIG_NET_CLS_ACT
2224
- const struct tc_action *a;
2225
- struct net_device *dev;
2226
- int i, ret;
3206
+ if (*flags & TCA_CLS_FLAGS_IN_HW)
3207
+ return;
3208
+ *flags |= TCA_CLS_FLAGS_IN_HW;
3209
+ atomic_inc(&block->offloadcnt);
3210
+}
22273211
2228
- if (!tcf_exts_has_actions(exts))
2229
- return 0;
3212
+static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3213
+{
3214
+ if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3215
+ return;
3216
+ *flags &= ~TCA_CLS_FLAGS_IN_HW;
3217
+ atomic_dec(&block->offloadcnt);
3218
+}
22303219
2231
- for (i = 0; i < exts->nr_actions; i++) {
2232
- a = exts->actions[i];
2233
- if (!a->ops->get_dev)
2234
- continue;
2235
- dev = a->ops->get_dev(a);
2236
- if (!dev)
2237
- continue;
2238
- ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
2239
- a->ops->put_dev(dev);
2240
- if (ret < 0)
2241
- return ret;
2242
- ok_count += ret;
3220
+static void tc_cls_offload_cnt_update(struct tcf_block *block,
3221
+ struct tcf_proto *tp, u32 *cnt,
3222
+ u32 *flags, u32 diff, bool add)
3223
+{
3224
+ lockdep_assert_held(&block->cb_lock);
3225
+
3226
+ spin_lock(&tp->lock);
3227
+ if (add) {
3228
+ if (!*cnt)
3229
+ tcf_block_offload_inc(block, flags);
3230
+ *cnt += diff;
3231
+ } else {
3232
+ *cnt -= diff;
3233
+ if (!*cnt)
3234
+ tcf_block_offload_dec(block, flags);
22433235 }
2244
-#endif
3236
+ spin_unlock(&tp->lock);
3237
+}
3238
+
3239
+static void
3240
+tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3241
+ u32 *cnt, u32 *flags)
3242
+{
3243
+ lockdep_assert_held(&block->cb_lock);
3244
+
3245
+ spin_lock(&tp->lock);
3246
+ tcf_block_offload_dec(block, flags);
3247
+ *cnt = 0;
3248
+ spin_unlock(&tp->lock);
3249
+}
3250
+
3251
+static int
3252
+__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3253
+ void *type_data, bool err_stop)
3254
+{
3255
+ struct flow_block_cb *block_cb;
3256
+ int ok_count = 0;
3257
+ int err;
3258
+
3259
+ list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3260
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
3261
+ if (err) {
3262
+ if (err_stop)
3263
+ return err;
3264
+ } else {
3265
+ ok_count++;
3266
+ }
3267
+ }
22453268 return ok_count;
22463269 }
22473270
2248
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
2249
- enum tc_setup_type type, void *type_data, bool err_stop)
3271
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3272
+ void *type_data, bool err_stop, bool rtnl_held)
22503273 {
3274
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
22513275 int ok_count;
2252
- int ret;
22533276
2254
- ret = tcf_block_cb_call(block, type, type_data, err_stop);
2255
- if (ret < 0)
2256
- return ret;
2257
- ok_count = ret;
3277
+retry:
3278
+ if (take_rtnl)
3279
+ rtnl_lock();
3280
+ down_read(&block->cb_lock);
3281
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3282
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3283
+ * obtain the locks in same order here.
3284
+ */
3285
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3286
+ up_read(&block->cb_lock);
3287
+ take_rtnl = true;
3288
+ goto retry;
3289
+ }
22583290
2259
- if (!exts || ok_count)
2260
- return ok_count;
2261
- ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
2262
- if (ret < 0)
2263
- return ret;
2264
- ok_count += ret;
3291
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
22653292
3293
+ up_read(&block->cb_lock);
3294
+ if (take_rtnl)
3295
+ rtnl_unlock();
22663296 return ok_count;
22673297 }
22683298 EXPORT_SYMBOL(tc_setup_cb_call);
3299
+
3300
+/* Non-destructive filter add. If filter that wasn't already in hardware is
3301
+ * successfully offloaded, increment block offloads counter. On failure,
3302
+ * previously offloaded filter is considered to be intact and offloads counter
3303
+ * is not decremented.
3304
+ */
3305
+
3306
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3307
+ enum tc_setup_type type, void *type_data, bool err_stop,
3308
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3309
+{
3310
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3311
+ int ok_count;
3312
+
3313
+retry:
3314
+ if (take_rtnl)
3315
+ rtnl_lock();
3316
+ down_read(&block->cb_lock);
3317
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3318
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3319
+ * obtain the locks in same order here.
3320
+ */
3321
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3322
+ up_read(&block->cb_lock);
3323
+ take_rtnl = true;
3324
+ goto retry;
3325
+ }
3326
+
3327
+ /* Make sure all netdevs sharing this block are offload-capable. */
3328
+ if (block->nooffloaddevcnt && err_stop) {
3329
+ ok_count = -EOPNOTSUPP;
3330
+ goto err_unlock;
3331
+ }
3332
+
3333
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3334
+ if (ok_count < 0)
3335
+ goto err_unlock;
3336
+
3337
+ if (tp->ops->hw_add)
3338
+ tp->ops->hw_add(tp, type_data);
3339
+ if (ok_count > 0)
3340
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3341
+ ok_count, true);
3342
+err_unlock:
3343
+ up_read(&block->cb_lock);
3344
+ if (take_rtnl)
3345
+ rtnl_unlock();
3346
+ return ok_count < 0 ? ok_count : 0;
3347
+}
3348
+EXPORT_SYMBOL(tc_setup_cb_add);
3349
+
3350
+/* Destructive filter replace. If filter that wasn't already in hardware is
3351
+ * successfully offloaded, increment block offload counter. On failure,
3352
+ * previously offloaded filter is considered to be destroyed and offload counter
3353
+ * is decremented.
3354
+ */
3355
+
3356
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3357
+ enum tc_setup_type type, void *type_data, bool err_stop,
3358
+ u32 *old_flags, unsigned int *old_in_hw_count,
3359
+ u32 *new_flags, unsigned int *new_in_hw_count,
3360
+ bool rtnl_held)
3361
+{
3362
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3363
+ int ok_count;
3364
+
3365
+retry:
3366
+ if (take_rtnl)
3367
+ rtnl_lock();
3368
+ down_read(&block->cb_lock);
3369
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3370
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3371
+ * obtain the locks in same order here.
3372
+ */
3373
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3374
+ up_read(&block->cb_lock);
3375
+ take_rtnl = true;
3376
+ goto retry;
3377
+ }
3378
+
3379
+ /* Make sure all netdevs sharing this block are offload-capable. */
3380
+ if (block->nooffloaddevcnt && err_stop) {
3381
+ ok_count = -EOPNOTSUPP;
3382
+ goto err_unlock;
3383
+ }
3384
+
3385
+ tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3386
+ if (tp->ops->hw_del)
3387
+ tp->ops->hw_del(tp, type_data);
3388
+
3389
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3390
+ if (ok_count < 0)
3391
+ goto err_unlock;
3392
+
3393
+ if (tp->ops->hw_add)
3394
+ tp->ops->hw_add(tp, type_data);
3395
+ if (ok_count > 0)
3396
+ tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3397
+ new_flags, ok_count, true);
3398
+err_unlock:
3399
+ up_read(&block->cb_lock);
3400
+ if (take_rtnl)
3401
+ rtnl_unlock();
3402
+ return ok_count < 0 ? ok_count : 0;
3403
+}
3404
+EXPORT_SYMBOL(tc_setup_cb_replace);
3405
+
3406
+/* Destroy filter and decrement block offload counter, if filter was previously
3407
+ * offloaded.
3408
+ */
3409
+
3410
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3411
+ enum tc_setup_type type, void *type_data, bool err_stop,
3412
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3413
+{
3414
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3415
+ int ok_count;
3416
+
3417
+retry:
3418
+ if (take_rtnl)
3419
+ rtnl_lock();
3420
+ down_read(&block->cb_lock);
3421
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
3422
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
3423
+ * obtain the locks in same order here.
3424
+ */
3425
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3426
+ up_read(&block->cb_lock);
3427
+ take_rtnl = true;
3428
+ goto retry;
3429
+ }
3430
+
3431
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3432
+
3433
+ tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3434
+ if (tp->ops->hw_del)
3435
+ tp->ops->hw_del(tp, type_data);
3436
+
3437
+ up_read(&block->cb_lock);
3438
+ if (take_rtnl)
3439
+ rtnl_unlock();
3440
+ return ok_count < 0 ? ok_count : 0;
3441
+}
3442
+EXPORT_SYMBOL(tc_setup_cb_destroy);
3443
+
3444
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3445
+ bool add, flow_setup_cb_t *cb,
3446
+ enum tc_setup_type type, void *type_data,
3447
+ void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3448
+{
3449
+ int err = cb(type, type_data, cb_priv);
3450
+
3451
+ if (err) {
3452
+ if (add && tc_skip_sw(*flags))
3453
+ return err;
3454
+ } else {
3455
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3456
+ add);
3457
+ }
3458
+
3459
+ return 0;
3460
+}
3461
+EXPORT_SYMBOL(tc_setup_cb_reoffload);
3462
+
3463
+static int tcf_act_get_cookie(struct flow_action_entry *entry,
3464
+ const struct tc_action *act)
3465
+{
3466
+ struct tc_cookie *cookie;
3467
+ int err = 0;
3468
+
3469
+ rcu_read_lock();
3470
+ cookie = rcu_dereference(act->act_cookie);
3471
+ if (cookie) {
3472
+ entry->cookie = flow_action_cookie_create(cookie->data,
3473
+ cookie->len,
3474
+ GFP_ATOMIC);
3475
+ if (!entry->cookie)
3476
+ err = -ENOMEM;
3477
+ }
3478
+ rcu_read_unlock();
3479
+ return err;
3480
+}
3481
+
3482
+static void tcf_act_put_cookie(struct flow_action_entry *entry)
3483
+{
3484
+ flow_action_cookie_destroy(entry->cookie);
3485
+}
3486
+
3487
+void tc_cleanup_flow_action(struct flow_action *flow_action)
3488
+{
3489
+ struct flow_action_entry *entry;
3490
+ int i;
3491
+
3492
+ flow_action_for_each(i, entry, flow_action) {
3493
+ tcf_act_put_cookie(entry);
3494
+ if (entry->destructor)
3495
+ entry->destructor(entry->destructor_priv);
3496
+ }
3497
+}
3498
+EXPORT_SYMBOL(tc_cleanup_flow_action);
3499
+
3500
+static void tcf_mirred_get_dev(struct flow_action_entry *entry,
3501
+ const struct tc_action *act)
3502
+{
3503
+#ifdef CONFIG_NET_CLS_ACT
3504
+ entry->dev = act->ops->get_dev(act, &entry->destructor);
3505
+ if (!entry->dev)
3506
+ return;
3507
+ entry->destructor_priv = entry->dev;
3508
+#endif
3509
+}
3510
+
3511
+static void tcf_tunnel_encap_put_tunnel(void *priv)
3512
+{
3513
+ struct ip_tunnel_info *tunnel = priv;
3514
+
3515
+ kfree(tunnel);
3516
+}
3517
+
3518
+static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
3519
+ const struct tc_action *act)
3520
+{
3521
+ entry->tunnel = tcf_tunnel_info_copy(act);
3522
+ if (!entry->tunnel)
3523
+ return -ENOMEM;
3524
+ entry->destructor = tcf_tunnel_encap_put_tunnel;
3525
+ entry->destructor_priv = entry->tunnel;
3526
+ return 0;
3527
+}
3528
+
3529
+static void tcf_sample_get_group(struct flow_action_entry *entry,
3530
+ const struct tc_action *act)
3531
+{
3532
+#ifdef CONFIG_NET_CLS_ACT
3533
+ entry->sample.psample_group =
3534
+ act->ops->get_psample_group(act, &entry->destructor);
3535
+ entry->destructor_priv = entry->sample.psample_group;
3536
+#endif
3537
+}
3538
+
3539
+static void tcf_gate_entry_destructor(void *priv)
3540
+{
3541
+ struct action_gate_entry *oe = priv;
3542
+
3543
+ kfree(oe);
3544
+}
3545
+
3546
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
3547
+ const struct tc_action *act)
3548
+{
3549
+ entry->gate.entries = tcf_gate_get_list(act);
3550
+
3551
+ if (!entry->gate.entries)
3552
+ return -EINVAL;
3553
+
3554
+ entry->destructor = tcf_gate_entry_destructor;
3555
+ entry->destructor_priv = entry->gate.entries;
3556
+
3557
+ return 0;
3558
+}
3559
+
3560
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
3561
+{
3562
+ if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
3563
+ return FLOW_ACTION_HW_STATS_DONT_CARE;
3564
+ else if (!hw_stats)
3565
+ return FLOW_ACTION_HW_STATS_DISABLED;
3566
+
3567
+ return hw_stats;
3568
+}
3569
+
3570
+int tc_setup_flow_action(struct flow_action *flow_action,
3571
+ const struct tcf_exts *exts)
3572
+{
3573
+ struct tc_action *act;
3574
+ int i, j, k, err = 0;
3575
+
3576
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
3577
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
3578
+ BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3579
+
3580
+ if (!exts)
3581
+ return 0;
3582
+
3583
+ j = 0;
3584
+ tcf_exts_for_each_action(i, act, exts) {
3585
+ struct flow_action_entry *entry;
3586
+
3587
+ entry = &flow_action->entries[j];
3588
+ spin_lock_bh(&act->tcfa_lock);
3589
+ err = tcf_act_get_cookie(entry, act);
3590
+ if (err)
3591
+ goto err_out_locked;
3592
+
3593
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3594
+
3595
+ if (is_tcf_gact_ok(act)) {
3596
+ entry->id = FLOW_ACTION_ACCEPT;
3597
+ } else if (is_tcf_gact_shot(act)) {
3598
+ entry->id = FLOW_ACTION_DROP;
3599
+ } else if (is_tcf_gact_trap(act)) {
3600
+ entry->id = FLOW_ACTION_TRAP;
3601
+ } else if (is_tcf_gact_goto_chain(act)) {
3602
+ entry->id = FLOW_ACTION_GOTO;
3603
+ entry->chain_index = tcf_gact_goto_chain_index(act);
3604
+ } else if (is_tcf_mirred_egress_redirect(act)) {
3605
+ entry->id = FLOW_ACTION_REDIRECT;
3606
+ tcf_mirred_get_dev(entry, act);
3607
+ } else if (is_tcf_mirred_egress_mirror(act)) {
3608
+ entry->id = FLOW_ACTION_MIRRED;
3609
+ tcf_mirred_get_dev(entry, act);
3610
+ } else if (is_tcf_mirred_ingress_redirect(act)) {
3611
+ entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3612
+ tcf_mirred_get_dev(entry, act);
3613
+ } else if (is_tcf_mirred_ingress_mirror(act)) {
3614
+ entry->id = FLOW_ACTION_MIRRED_INGRESS;
3615
+ tcf_mirred_get_dev(entry, act);
3616
+ } else if (is_tcf_vlan(act)) {
3617
+ switch (tcf_vlan_action(act)) {
3618
+ case TCA_VLAN_ACT_PUSH:
3619
+ entry->id = FLOW_ACTION_VLAN_PUSH;
3620
+ entry->vlan.vid = tcf_vlan_push_vid(act);
3621
+ entry->vlan.proto = tcf_vlan_push_proto(act);
3622
+ entry->vlan.prio = tcf_vlan_push_prio(act);
3623
+ break;
3624
+ case TCA_VLAN_ACT_POP:
3625
+ entry->id = FLOW_ACTION_VLAN_POP;
3626
+ break;
3627
+ case TCA_VLAN_ACT_MODIFY:
3628
+ entry->id = FLOW_ACTION_VLAN_MANGLE;
3629
+ entry->vlan.vid = tcf_vlan_push_vid(act);
3630
+ entry->vlan.proto = tcf_vlan_push_proto(act);
3631
+ entry->vlan.prio = tcf_vlan_push_prio(act);
3632
+ break;
3633
+ default:
3634
+ err = -EOPNOTSUPP;
3635
+ goto err_out_locked;
3636
+ }
3637
+ } else if (is_tcf_tunnel_set(act)) {
3638
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3639
+ err = tcf_tunnel_encap_get_tunnel(entry, act);
3640
+ if (err)
3641
+ goto err_out_locked;
3642
+ } else if (is_tcf_tunnel_release(act)) {
3643
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
3644
+ } else if (is_tcf_pedit(act)) {
3645
+ for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3646
+ switch (tcf_pedit_cmd(act, k)) {
3647
+ case TCA_PEDIT_KEY_EX_CMD_SET:
3648
+ entry->id = FLOW_ACTION_MANGLE;
3649
+ break;
3650
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
3651
+ entry->id = FLOW_ACTION_ADD;
3652
+ break;
3653
+ default:
3654
+ err = -EOPNOTSUPP;
3655
+ goto err_out_locked;
3656
+ }
3657
+ entry->mangle.htype = tcf_pedit_htype(act, k);
3658
+ entry->mangle.mask = tcf_pedit_mask(act, k);
3659
+ entry->mangle.val = tcf_pedit_val(act, k);
3660
+ entry->mangle.offset = tcf_pedit_offset(act, k);
3661
+ entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3662
+ entry = &flow_action->entries[++j];
3663
+ }
3664
+ } else if (is_tcf_csum(act)) {
3665
+ entry->id = FLOW_ACTION_CSUM;
3666
+ entry->csum_flags = tcf_csum_update_flags(act);
3667
+ } else if (is_tcf_skbedit_mark(act)) {
3668
+ entry->id = FLOW_ACTION_MARK;
3669
+ entry->mark = tcf_skbedit_mark(act);
3670
+ } else if (is_tcf_sample(act)) {
3671
+ entry->id = FLOW_ACTION_SAMPLE;
3672
+ entry->sample.trunc_size = tcf_sample_trunc_size(act);
3673
+ entry->sample.truncate = tcf_sample_truncate(act);
3674
+ entry->sample.rate = tcf_sample_rate(act);
3675
+ tcf_sample_get_group(entry, act);
3676
+ } else if (is_tcf_police(act)) {
3677
+ entry->id = FLOW_ACTION_POLICE;
3678
+ entry->police.burst = tcf_police_burst(act);
3679
+ entry->police.rate_bytes_ps =
3680
+ tcf_police_rate_bytes_ps(act);
3681
+ entry->police.mtu = tcf_police_tcfp_mtu(act);
3682
+ entry->police.index = act->tcfa_index;
3683
+ } else if (is_tcf_ct(act)) {
3684
+ entry->id = FLOW_ACTION_CT;
3685
+ entry->ct.action = tcf_ct_action(act);
3686
+ entry->ct.zone = tcf_ct_zone(act);
3687
+ entry->ct.flow_table = tcf_ct_ft(act);
3688
+ } else if (is_tcf_mpls(act)) {
3689
+ switch (tcf_mpls_action(act)) {
3690
+ case TCA_MPLS_ACT_PUSH:
3691
+ entry->id = FLOW_ACTION_MPLS_PUSH;
3692
+ entry->mpls_push.proto = tcf_mpls_proto(act);
3693
+ entry->mpls_push.label = tcf_mpls_label(act);
3694
+ entry->mpls_push.tc = tcf_mpls_tc(act);
3695
+ entry->mpls_push.bos = tcf_mpls_bos(act);
3696
+ entry->mpls_push.ttl = tcf_mpls_ttl(act);
3697
+ break;
3698
+ case TCA_MPLS_ACT_POP:
3699
+ entry->id = FLOW_ACTION_MPLS_POP;
3700
+ entry->mpls_pop.proto = tcf_mpls_proto(act);
3701
+ break;
3702
+ case TCA_MPLS_ACT_MODIFY:
3703
+ entry->id = FLOW_ACTION_MPLS_MANGLE;
3704
+ entry->mpls_mangle.label = tcf_mpls_label(act);
3705
+ entry->mpls_mangle.tc = tcf_mpls_tc(act);
3706
+ entry->mpls_mangle.bos = tcf_mpls_bos(act);
3707
+ entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
3708
+ break;
3709
+ default:
3710
+ err = -EOPNOTSUPP;
3711
+ goto err_out_locked;
3712
+ }
3713
+ } else if (is_tcf_skbedit_ptype(act)) {
3714
+ entry->id = FLOW_ACTION_PTYPE;
3715
+ entry->ptype = tcf_skbedit_ptype(act);
3716
+ } else if (is_tcf_skbedit_priority(act)) {
3717
+ entry->id = FLOW_ACTION_PRIORITY;
3718
+ entry->priority = tcf_skbedit_priority(act);
3719
+ } else if (is_tcf_gate(act)) {
3720
+ entry->id = FLOW_ACTION_GATE;
3721
+ entry->gate.index = tcf_gate_index(act);
3722
+ entry->gate.prio = tcf_gate_prio(act);
3723
+ entry->gate.basetime = tcf_gate_basetime(act);
3724
+ entry->gate.cycletime = tcf_gate_cycletime(act);
3725
+ entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
3726
+ entry->gate.num_entries = tcf_gate_num_entries(act);
3727
+ err = tcf_gate_get_entries(entry, act);
3728
+ if (err)
3729
+ goto err_out_locked;
3730
+ } else {
3731
+ err = -EOPNOTSUPP;
3732
+ goto err_out_locked;
3733
+ }
3734
+ spin_unlock_bh(&act->tcfa_lock);
3735
+
3736
+ if (!is_tcf_pedit(act))
3737
+ j++;
3738
+ }
3739
+
3740
+err_out:
3741
+ if (err)
3742
+ tc_cleanup_flow_action(flow_action);
3743
+
3744
+ return err;
3745
+err_out_locked:
3746
+ spin_unlock_bh(&act->tcfa_lock);
3747
+ goto err_out;
3748
+}
3749
+EXPORT_SYMBOL(tc_setup_flow_action);
3750
+
3751
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3752
+{
3753
+ unsigned int num_acts = 0;
3754
+ struct tc_action *act;
3755
+ int i;
3756
+
3757
+ tcf_exts_for_each_action(i, act, exts) {
3758
+ if (is_tcf_pedit(act))
3759
+ num_acts += tcf_pedit_nkeys(act);
3760
+ else
3761
+ num_acts++;
3762
+ }
3763
+ return num_acts;
3764
+}
3765
+EXPORT_SYMBOL(tcf_exts_num_actions);
3766
+
3767
+#ifdef CONFIG_NET_CLS_ACT
3768
+static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
3769
+ u32 *p_block_index,
3770
+ struct netlink_ext_ack *extack)
3771
+{
3772
+ *p_block_index = nla_get_u32(block_index_attr);
3773
+ if (!*p_block_index) {
3774
+ NL_SET_ERR_MSG(extack, "Block number may not be zero");
3775
+ return -EINVAL;
3776
+ }
3777
+
3778
+ return 0;
3779
+}
3780
+
3781
+int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
3782
+ enum flow_block_binder_type binder_type,
3783
+ struct nlattr *block_index_attr,
3784
+ struct netlink_ext_ack *extack)
3785
+{
3786
+ u32 block_index;
3787
+ int err;
3788
+
3789
+ if (!block_index_attr)
3790
+ return 0;
3791
+
3792
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3793
+ if (err)
3794
+ return err;
3795
+
3796
+ if (!block_index)
3797
+ return 0;
3798
+
3799
+ qe->info.binder_type = binder_type;
3800
+ qe->info.chain_head_change = tcf_chain_head_change_dflt;
3801
+ qe->info.chain_head_change_priv = &qe->filter_chain;
3802
+ qe->info.block_index = block_index;
3803
+
3804
+ return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
3805
+}
3806
+EXPORT_SYMBOL(tcf_qevent_init);
3807
+
3808
+void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
3809
+{
3810
+ if (qe->info.block_index)
3811
+ tcf_block_put_ext(qe->block, sch, &qe->info);
3812
+}
3813
+EXPORT_SYMBOL(tcf_qevent_destroy);
3814
+
3815
+int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
3816
+ struct netlink_ext_ack *extack)
3817
+{
3818
+ u32 block_index;
3819
+ int err;
3820
+
3821
+ if (!block_index_attr)
3822
+ return 0;
3823
+
3824
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3825
+ if (err)
3826
+ return err;
3827
+
3828
+ /* Bounce newly-configured block or change in block. */
3829
+ if (block_index != qe->info.block_index) {
3830
+ NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
3831
+ return -EINVAL;
3832
+ }
3833
+
3834
+ return 0;
3835
+}
3836
+EXPORT_SYMBOL(tcf_qevent_validate_change);
3837
+
3838
+struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3839
+ struct sk_buff **to_free, int *ret)
3840
+{
3841
+ struct tcf_result cl_res;
3842
+ struct tcf_proto *fl;
3843
+
3844
+ if (!qe->info.block_index)
3845
+ return skb;
3846
+
3847
+ fl = rcu_dereference_bh(qe->filter_chain);
3848
+
3849
+ switch (tcf_classify(skb, fl, &cl_res, false)) {
3850
+ case TC_ACT_SHOT:
3851
+ qdisc_qstats_drop(sch);
3852
+ __qdisc_drop(skb, to_free);
3853
+ *ret = __NET_XMIT_BYPASS;
3854
+ return NULL;
3855
+ case TC_ACT_STOLEN:
3856
+ case TC_ACT_QUEUED:
3857
+ case TC_ACT_TRAP:
3858
+ __qdisc_drop(skb, to_free);
3859
+ *ret = __NET_XMIT_STOLEN;
3860
+ return NULL;
3861
+ case TC_ACT_REDIRECT:
3862
+ skb_do_redirect(skb);
3863
+ *ret = __NET_XMIT_STOLEN;
3864
+ return NULL;
3865
+ }
3866
+
3867
+ return skb;
3868
+}
3869
+EXPORT_SYMBOL(tcf_qevent_handle);
3870
+
3871
+int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
3872
+{
3873
+ if (!qe->info.block_index)
3874
+ return 0;
3875
+ return nla_put_u32(skb, attr_name, qe->info.block_index);
3876
+}
3877
+EXPORT_SYMBOL(tcf_qevent_dump);
3878
+#endif
22693879
22703880 static __net_init int tcf_net_init(struct net *net)
22713881 {
22723882 struct tcf_net *tn = net_generic(net, tcf_net_id);
22733883
3884
+ spin_lock_init(&tn->idr_lock);
22743885 idr_init(&tn->idr);
22753886 return 0;
22763887 }
....@@ -2301,10 +3912,12 @@
23013912 if (err)
23023913 goto err_register_pernet_subsys;
23033914
2304
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
2305
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
3915
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3916
+ RTNL_FLAG_DOIT_UNLOCKED);
3917
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3918
+ RTNL_FLAG_DOIT_UNLOCKED);
23063919 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
2307
- tc_dump_tfilter, 0);
3920
+ tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
23083921 rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
23093922 rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
23103923 rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,