hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/net/sched/sch_api.c
....@@ -1,10 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * net/sched/sch_api.c Packet scheduler API.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License, or (at your option) any later version.
84 *
95 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
106 *
....@@ -27,7 +23,6 @@
2723 #include <linux/kmod.h>
2824 #include <linux/list.h>
2925 #include <linux/hrtimer.h>
30
-#include <linux/lockdep.h>
3126 #include <linux/slab.h>
3227 #include <linux/hashtable.h>
3328
....@@ -36,6 +31,8 @@
3631 #include <net/netlink.h>
3732 #include <net/pkt_sched.h>
3833 #include <net/pkt_cls.h>
34
+
35
+#include <trace/events/qdisc.h>
3936
4037 /*
4138
....@@ -270,7 +267,8 @@
270267 root->handle == handle)
271268 return root;
272269
273
- hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
270
+ hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271
+ lockdep_rtnl_is_held()) {
274272 if (q->handle == handle)
275273 return q;
276274 }
....@@ -303,7 +301,7 @@
303301
304302 if (!handle)
305303 return NULL;
306
- q = qdisc_match_from_root(dev->qdisc, handle);
304
+ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
307305 if (q)
308306 goto out;
309307
....@@ -322,7 +320,7 @@
322320
323321 if (!handle)
324322 return NULL;
325
- q = qdisc_match_from_root(dev->qdisc, handle);
323
+ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
326324 if (q)
327325 goto out;
328326
....@@ -336,7 +334,6 @@
336334 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
337335 {
338336 unsigned long cl;
339
- struct Qdisc *leaf;
340337 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
341338
342339 if (cops == NULL)
....@@ -345,8 +342,7 @@
345342
346343 if (cl == 0)
347344 return NULL;
348
- leaf = cops->leaf(p, cl);
349
- return leaf;
345
+ return cops->leaf(p, cl);
350346 }
351347
352348 /* Find queueing discipline by name */
....@@ -483,7 +479,8 @@
483479 u16 *tab = NULL;
484480 int err;
485481
486
- err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
482
+ err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483
+ extack);
487484 if (err < 0)
488485 return ERR_PTR(err);
489486 if (!tb[TCA_STAB_BASE]) {
....@@ -536,11 +533,6 @@
536533 return stab;
537534 }
538535
539
-static void stab_kfree_rcu(struct rcu_head *head)
540
-{
541
- kfree(container_of(head, struct qdisc_size_table, rcu));
542
-}
543
-
544536 void qdisc_put_stab(struct qdisc_size_table *tab)
545537 {
546538 if (!tab)
....@@ -548,7 +540,7 @@
548540
549541 if (--tab->refcnt == 0) {
550542 list_del(&tab->list);
551
- call_rcu_bh(&tab->rcu, stab_kfree_rcu);
543
+ kfree_rcu(tab, rcu);
552544 }
553545 }
554546 EXPORT_SYMBOL(qdisc_put_stab);
....@@ -557,7 +549,7 @@
557549 {
558550 struct nlattr *nest;
559551
560
- nest = nla_nest_start(skb, TCA_STAB);
552
+ nest = nla_nest_start_noflag(skb, TCA_STAB);
561553 if (nest == NULL)
562554 goto nla_put_failure;
563555 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
....@@ -636,21 +628,28 @@
636628 }
637629 EXPORT_SYMBOL(qdisc_watchdog_init);
638630
639
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
631
+void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
632
+ u64 delta_ns)
640633 {
641634 if (test_bit(__QDISC_STATE_DEACTIVATED,
642635 &qdisc_root_sleeping(wd->qdisc)->state))
643636 return;
644637
645
- if (wd->last_expires == expires)
646
- return;
638
+ if (hrtimer_is_queued(&wd->timer)) {
639
+ /* If timer is already set in [expires, expires + delta_ns],
640
+ * do not reprogram it.
641
+ */
642
+ if (wd->last_expires - expires <= delta_ns)
643
+ return;
644
+ }
647645
648646 wd->last_expires = expires;
649
- hrtimer_start(&wd->timer,
650
- ns_to_ktime(expires),
651
- HRTIMER_MODE_ABS_PINNED);
647
+ hrtimer_start_range_ns(&wd->timer,
648
+ ns_to_ktime(expires),
649
+ delta_ns,
650
+ HRTIMER_MODE_ABS_PINNED);
652651 }
653
-EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
652
+EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
654653
655654 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
656655 {
....@@ -768,8 +767,7 @@
768767 return 0;
769768 }
770769
771
-void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
772
- unsigned int len)
770
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
773771 {
774772 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
775773 const struct Qdisc_class_ops *cops;
....@@ -817,6 +815,71 @@
817815 rcu_read_unlock();
818816 }
819817 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
818
+
819
+int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
820
+ void *type_data)
821
+{
822
+ struct net_device *dev = qdisc_dev(sch);
823
+ int err;
824
+
825
+ sch->flags &= ~TCQ_F_OFFLOADED;
826
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
827
+ return 0;
828
+
829
+ err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
830
+ if (err == -EOPNOTSUPP)
831
+ return 0;
832
+
833
+ if (!err)
834
+ sch->flags |= TCQ_F_OFFLOADED;
835
+
836
+ return err;
837
+}
838
+EXPORT_SYMBOL(qdisc_offload_dump_helper);
839
+
840
+void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
841
+ struct Qdisc *new, struct Qdisc *old,
842
+ enum tc_setup_type type, void *type_data,
843
+ struct netlink_ext_ack *extack)
844
+{
845
+ bool any_qdisc_is_offloaded;
846
+ int err;
847
+
848
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
849
+ return;
850
+
851
+ err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
852
+
853
+ /* Don't report error if the graft is part of destroy operation. */
854
+ if (!err || !new || new == &noop_qdisc)
855
+ return;
856
+
857
+ /* Don't report error if the parent, the old child and the new
858
+ * one are not offloaded.
859
+ */
860
+ any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
861
+ any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
862
+ any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
863
+
864
+ if (any_qdisc_is_offloaded)
865
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
866
+}
867
+EXPORT_SYMBOL(qdisc_offload_graft_helper);
868
+
869
+static void qdisc_offload_graft_root(struct net_device *dev,
870
+ struct Qdisc *new, struct Qdisc *old,
871
+ struct netlink_ext_ack *extack)
872
+{
873
+ struct tc_root_qopt_offload graft_offload = {
874
+ .command = TC_ROOT_GRAFT,
875
+ .handle = new ? new->handle : 0,
876
+ .ingress = (new && new->flags & TCQ_F_INGRESS) ||
877
+ (old && old->flags & TCQ_F_INGRESS),
878
+ };
879
+
880
+ qdisc_offload_graft_helper(dev, NULL, new, old,
881
+ TC_SETUP_ROOT_QDISC, &graft_offload, extack);
882
+}
820883
821884 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
822885 u32 portid, u32 seq, u16 flags, int event)
....@@ -949,6 +1012,19 @@
9491012 qdisc_put(old);
9501013 }
9511014
1015
+static void qdisc_clear_nolock(struct Qdisc *sch)
1016
+{
1017
+ sch->flags &= ~TCQ_F_NOLOCK;
1018
+ if (!(sch->flags & TCQ_F_CPUSTATS))
1019
+ return;
1020
+
1021
+ free_percpu(sch->cpu_bstats);
1022
+ free_percpu(sch->cpu_qstats);
1023
+ sch->cpu_bstats = NULL;
1024
+ sch->cpu_qstats = NULL;
1025
+ sch->flags &= ~TCQ_F_CPUSTATS;
1026
+}
1027
+
9521028 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
9531029 * to device "dev".
9541030 *
....@@ -965,7 +1041,6 @@
9651041 {
9661042 struct Qdisc *q = old;
9671043 struct net *net = dev_net(dev);
968
- int err = 0;
9691044
9701045 if (parent == NULL) {
9711046 unsigned int i, num_q, ingress;
....@@ -984,6 +1059,8 @@
9841059
9851060 if (dev->flags & IFF_UP)
9861061 dev_deactivate(dev);
1062
+
1063
+ qdisc_offload_graft_root(dev, new, old, extack);
9871064
9881065 if (new && new->ops->attach)
9891066 goto skip;
....@@ -1004,11 +1081,12 @@
10041081
10051082 skip:
10061083 if (!ingress) {
1007
- notify_and_destroy(net, skb, n, classid,
1008
- dev->qdisc, new);
1084
+ old = rtnl_dereference(dev->qdisc);
10091085 if (new && !new->ops->attach)
10101086 qdisc_refcount_inc(new);
1011
- dev->qdisc = new ? : &noop_qdisc;
1087
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1088
+
1089
+ notify_and_destroy(net, skb, n, classid, old, new);
10121090
10131091 if (new && new->ops->attach)
10141092 new->ops->attach(new);
....@@ -1020,28 +1098,28 @@
10201098 dev_activate(dev);
10211099 } else {
10221100 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1101
+ unsigned long cl;
1102
+ int err;
10231103
10241104 /* Only support running class lockless if parent is lockless */
1025
- if (new && (new->flags & TCQ_F_NOLOCK) &&
1026
- parent && !(parent->flags & TCQ_F_NOLOCK))
1027
- new->flags &= ~TCQ_F_NOLOCK;
1105
+ if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1106
+ qdisc_clear_nolock(new);
10281107
1029
- err = -EOPNOTSUPP;
1030
- if (cops && cops->graft) {
1031
- unsigned long cl = cops->find(parent, classid);
1108
+ if (!cops || !cops->graft)
1109
+ return -EOPNOTSUPP;
10321110
1033
- if (cl) {
1034
- err = cops->graft(parent, cl, new, &old,
1035
- extack);
1036
- } else {
1037
- NL_SET_ERR_MSG(extack, "Specified class not found");
1038
- err = -ENOENT;
1039
- }
1111
+ cl = cops->find(parent, classid);
1112
+ if (!cl) {
1113
+ NL_SET_ERR_MSG(extack, "Specified class not found");
1114
+ return -ENOENT;
10401115 }
1041
- if (!err)
1042
- notify_and_destroy(net, skb, n, classid, old, new);
1116
+
1117
+ err = cops->graft(parent, cl, new, &old, extack);
1118
+ if (err)
1119
+ return err;
1120
+ notify_and_destroy(net, skb, n, classid, old, new);
10431121 }
1044
- return err;
1122
+ return 0;
10451123 }
10461124
10471125 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
....@@ -1077,10 +1155,6 @@
10771155 }
10781156 return 0;
10791157 }
1080
-
1081
-/* lockdep annotation is needed for ingress; egress gets it only for name */
1082
-static struct lock_class_key qdisc_tx_lock;
1083
-static struct lock_class_key qdisc_rx_lock;
10841158
10851159 /*
10861160 Allocate and initialize new qdisc.
....@@ -1146,15 +1220,15 @@
11461220 if (handle == TC_H_INGRESS) {
11471221 sch->flags |= TCQ_F_INGRESS;
11481222 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1149
- lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
11501223 } else {
11511224 if (handle == 0) {
11521225 handle = qdisc_alloc_handle(dev);
1153
- err = -ENOMEM;
1154
- if (handle == 0)
1226
+ if (handle == 0) {
1227
+ NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1228
+ err = -ENOSPC;
11551229 goto err_out3;
1230
+ }
11561231 }
1157
- lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
11581232 if (!netif_is_multiqueue(dev))
11591233 sch->flags |= TCQ_F_ONETXQUEUE;
11601234 }
....@@ -1219,6 +1293,7 @@
12191293 }
12201294
12211295 qdisc_hash_add(sch, false);
1296
+ trace_qdisc_create(ops, dev, parent);
12221297
12231298 return sch;
12241299
....@@ -1333,8 +1408,7 @@
13331408 }
13341409
13351410 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1336
- [TCA_KIND] = { .type = NLA_NUL_STRING,
1337
- .len = IFNAMSIZ - 1 },
1411
+ [TCA_KIND] = { .type = NLA_STRING },
13381412 [TCA_RATE] = { .type = NLA_BINARY,
13391413 .len = sizeof(struct tc_estimator) },
13401414 [TCA_STAB] = { .type = NLA_NESTED },
....@@ -1364,8 +1438,8 @@
13641438 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
13651439 return -EPERM;
13661440
1367
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1368
- extack);
1441
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1442
+ rtm_tca_policy, extack);
13691443 if (err < 0)
13701444 return err;
13711445
....@@ -1387,7 +1461,7 @@
13871461 q = dev_ingress_queue(dev)->qdisc_sleeping;
13881462 }
13891463 } else {
1390
- q = dev->qdisc;
1464
+ q = rtnl_dereference(dev->qdisc);
13911465 }
13921466 if (!q) {
13931467 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
....@@ -1449,8 +1523,8 @@
14491523
14501524 replay:
14511525 /* Reinit, just in case something touches this. */
1452
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1453
- extack);
1526
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1527
+ rtm_tca_policy, extack);
14541528 if (err < 0)
14551529 return err;
14561530
....@@ -1476,7 +1550,7 @@
14761550 q = dev_ingress_queue(dev)->qdisc_sleeping;
14771551 }
14781552 } else {
1479
- q = dev->qdisc;
1553
+ q = rtnl_dereference(dev->qdisc);
14801554 }
14811555
14821556 /* It may be default qdisc, ignore it */
....@@ -1684,8 +1758,8 @@
16841758 idx = 0;
16851759 ASSERT_RTNL();
16861760
1687
- err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1688
- rtm_tca_policy, NULL);
1761
+ err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1762
+ rtm_tca_policy, cb->extack);
16891763 if (err < 0)
16901764 return err;
16911765
....@@ -1698,7 +1772,8 @@
16981772 s_q_idx = 0;
16991773 q_idx = 0;
17001774
1701
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1775
+ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1776
+ skb, cb, &q_idx, s_q_idx,
17021777 true, tca[TCA_DUMP_INVISIBLE]) < 0)
17031778 goto done;
17041779
....@@ -1778,6 +1853,7 @@
17781853 {
17791854 struct sk_buff *skb;
17801855 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1856
+ int err = 0;
17811857
17821858 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
17831859 if (!skb)
....@@ -1788,8 +1864,11 @@
17881864 return -EINVAL;
17891865 }
17901866
1791
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1792
- n->nlmsg_flags & NLM_F_ECHO);
1867
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1868
+ n->nlmsg_flags & NLM_F_ECHO);
1869
+ if (err > 0)
1870
+ err = 0;
1871
+ return err;
17931872 }
17941873
17951874 static int tclass_del_notify(struct net *net,
....@@ -1820,8 +1899,11 @@
18201899 return err;
18211900 }
18221901
1823
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1824
- n->nlmsg_flags & NLM_F_ECHO);
1902
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1903
+ n->nlmsg_flags & NLM_F_ECHO);
1904
+ if (err > 0)
1905
+ err = 0;
1906
+ return err;
18251907 }
18261908
18271909 #ifdef CONFIG_NET_CLS
....@@ -1847,36 +1929,57 @@
18471929 return 0;
18481930 }
18491931
1932
+struct tc_bind_class_args {
1933
+ struct qdisc_walker w;
1934
+ unsigned long new_cl;
1935
+ u32 portid;
1936
+ u32 clid;
1937
+};
1938
+
1939
+static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1940
+ struct qdisc_walker *w)
1941
+{
1942
+ struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1943
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1944
+ struct tcf_block *block;
1945
+ struct tcf_chain *chain;
1946
+
1947
+ block = cops->tcf_block(q, cl, NULL);
1948
+ if (!block)
1949
+ return 0;
1950
+ for (chain = tcf_get_next_chain(block, NULL);
1951
+ chain;
1952
+ chain = tcf_get_next_chain(block, chain)) {
1953
+ struct tcf_proto *tp;
1954
+
1955
+ for (tp = tcf_get_next_proto(chain, NULL, true);
1956
+ tp; tp = tcf_get_next_proto(chain, tp, true)) {
1957
+ struct tcf_bind_args arg = {};
1958
+
1959
+ arg.w.fn = tcf_node_bind;
1960
+ arg.classid = a->clid;
1961
+ arg.base = cl;
1962
+ arg.cl = a->new_cl;
1963
+ tp->ops->walk(tp, &arg.w, true);
1964
+ }
1965
+ }
1966
+
1967
+ return 0;
1968
+}
1969
+
18501970 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
18511971 unsigned long new_cl)
18521972 {
18531973 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1854
- struct tcf_block *block;
1855
- struct tcf_chain *chain;
1856
- unsigned long cl;
1974
+ struct tc_bind_class_args args = {};
18571975
1858
- cl = cops->find(q, portid);
1859
- if (!cl)
1860
- return;
18611976 if (!cops->tcf_block)
18621977 return;
1863
- block = cops->tcf_block(q, cl, NULL);
1864
- if (!block)
1865
- return;
1866
- list_for_each_entry(chain, &block->chain_list, list) {
1867
- struct tcf_proto *tp;
1868
-
1869
- for (tp = rtnl_dereference(chain->filter_chain);
1870
- tp; tp = rtnl_dereference(tp->next)) {
1871
- struct tcf_bind_args arg = {};
1872
-
1873
- arg.w.fn = tcf_node_bind;
1874
- arg.classid = clid;
1875
- arg.base = cl;
1876
- arg.cl = new_cl;
1877
- tp->ops->walk(tp, &arg.w);
1878
- }
1879
- }
1978
+ args.portid = portid;
1979
+ args.clid = clid;
1980
+ args.new_cl = new_cl;
1981
+ args.w.fn = tc_bind_class_walker;
1982
+ q->ops->cl_ops->walk(q, &args.w);
18801983 }
18811984
18821985 #else
....@@ -1908,8 +2011,8 @@
19082011 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
19092012 return -EPERM;
19102013
1911
- err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1912
- extack);
2014
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2015
+ rtm_tca_policy, extack);
19132016 if (err < 0)
19142017 return err;
19152018
....@@ -1946,7 +2049,7 @@
19462049 } else if (qid1) {
19472050 qid = qid1;
19482051 } else if (qid == 0)
1949
- qid = dev->qdisc->handle;
2052
+ qid = rtnl_dereference(dev->qdisc)->handle;
19502053
19512054 /* Now qid is genuine qdisc handle consistent
19522055 * both with parent and child.
....@@ -1957,7 +2060,7 @@
19572060 portid = TC_H_MAKE(qid, portid);
19582061 } else {
19592062 if (qid == 0)
1960
- qid = dev->qdisc->handle;
2063
+ qid = rtnl_dereference(dev->qdisc)->handle;
19612064 }
19622065
19632066 /* OK. Locate qdisc */
....@@ -2118,7 +2221,8 @@
21182221 s_t = cb->args[0];
21192222 t = 0;
21202223
2121
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
2224
+ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2225
+ skb, tcm, cb, &t, s_t, true) < 0)
21222226 goto done;
21232227
21242228 dev_queue = dev_ingress_queue(dev);