hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/core/net-sysfs.c
....@@ -1,21 +1,17 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * net-sysfs.c - network device class and attributes
34 *
45 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
5
- *
6
- * This program is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU General Public License
8
- * as published by the Free Software Foundation; either version
9
- * 2 of the License, or (at your option) any later version.
106 */
117
128 #include <linux/capability.h>
139 #include <linux/kernel.h>
1410 #include <linux/netdevice.h>
15
-#include <net/switchdev.h>
1611 #include <linux/if_arp.h>
1712 #include <linux/slab.h>
1813 #include <linux/sched/signal.h>
14
+#include <linux/sched/isolation.h>
1915 #include <linux/nsproxy.h>
2016 #include <net/sock.h>
2117 #include <net/net_namespace.h>
....@@ -85,7 +81,7 @@
8581 struct net_device *netdev = to_net_dev(dev);
8682 struct net *net = dev_net(netdev);
8783 unsigned long new;
88
- int ret = -EINVAL;
84
+ int ret;
8985
9086 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
9187 return -EPERM;
....@@ -179,6 +175,14 @@
179175 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
180176 const char *buf, size_t len)
181177 {
178
+ struct net_device *netdev = to_net_dev(dev);
179
+
180
+ /* The check is also done in change_carrier; this helps returning early
181
+ * without hitting the trylock/restart in netdev_store.
182
+ */
183
+ if (!netdev->netdev_ops->ndo_change_carrier)
184
+ return -EOPNOTSUPP;
185
+
182186 return netdev_store(dev, attr, buf, len, change_carrier);
183187 }
184188
....@@ -200,10 +204,16 @@
200204 struct net_device *netdev = to_net_dev(dev);
201205 int ret = -EINVAL;
202206
207
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
208
+ * returning early without hitting the trylock/restart below.
209
+ */
210
+ if (!netdev->ethtool_ops->get_link_ksettings)
211
+ return ret;
212
+
203213 if (!rtnl_trylock())
204214 return restart_syscall();
205215
206
- if (netif_running(netdev)) {
216
+ if (netif_running(netdev) && netif_device_present(netdev)) {
207217 struct ethtool_link_ksettings cmd;
208218
209219 if (!__ethtool_get_link_ksettings(netdev, &cmd))
....@@ -219,6 +229,12 @@
219229 {
220230 struct net_device *netdev = to_net_dev(dev);
221231 int ret = -EINVAL;
232
+
233
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
234
+ * returning early without hitting the trylock/restart below.
235
+ */
236
+ if (!netdev->ethtool_ops->get_link_ksettings)
237
+ return ret;
222238
223239 if (!rtnl_trylock())
224240 return restart_syscall();
....@@ -248,6 +264,18 @@
248264 }
249265 static DEVICE_ATTR_RO(duplex);
250266
267
+static ssize_t testing_show(struct device *dev,
268
+ struct device_attribute *attr, char *buf)
269
+{
270
+ struct net_device *netdev = to_net_dev(dev);
271
+
272
+ if (netif_running(netdev))
273
+ return sprintf(buf, fmt_dec, !!netif_testing(netdev));
274
+
275
+ return -EINVAL;
276
+}
277
+static DEVICE_ATTR_RO(testing);
278
+
251279 static ssize_t dormant_show(struct device *dev,
252280 struct device_attribute *attr, char *buf)
253281 {
....@@ -265,7 +293,7 @@
265293 "notpresent", /* currently unused */
266294 "down",
267295 "lowerlayerdown",
268
- "testing", /* currently unused */
296
+ "testing",
269297 "dormant",
270298 "up"
271299 };
....@@ -337,7 +365,7 @@
337365
338366 static int change_flags(struct net_device *dev, unsigned long new_flags)
339367 {
340
- return dev_change_flags(dev, (unsigned int)new_flags);
368
+ return dev_change_flags(dev, (unsigned int)new_flags, NULL);
341369 }
342370
343371 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
....@@ -360,7 +388,7 @@
360388
361389 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
362390 {
363
- dev->gro_flush_timeout = val;
391
+ WRITE_ONCE(dev->gro_flush_timeout, val);
364392 return 0;
365393 }
366394
....@@ -374,6 +402,23 @@
374402 return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
375403 }
376404 NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
405
+
406
+static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
407
+{
408
+ WRITE_ONCE(dev->napi_defer_hard_irqs, val);
409
+ return 0;
410
+}
411
+
412
+static ssize_t napi_defer_hard_irqs_store(struct device *dev,
413
+ struct device_attribute *attr,
414
+ const char *buf, size_t len)
415
+{
416
+ if (!capable(CAP_NET_ADMIN))
417
+ return -EPERM;
418
+
419
+ return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
420
+}
421
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
377422
378423 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
379424 const char *buf, size_t len)
....@@ -443,6 +488,14 @@
443488 struct device_attribute *attr,
444489 const char *buf, size_t len)
445490 {
491
+ struct net_device *netdev = to_net_dev(dev);
492
+
493
+ /* The check is also done in change_proto_down; this helps returning
494
+ * early without hitting the trylock/restart in netdev_store.
495
+ */
496
+ if (!netdev->netdev_ops->ndo_change_proto_down)
497
+ return -EOPNOTSUPP;
498
+
446499 return netdev_store(dev, attr, buf, len, change_proto_down);
447500 }
448501 NETDEVICE_SHOW_RW(proto_down, fmt_dec);
....@@ -452,6 +505,12 @@
452505 {
453506 struct net_device *netdev = to_net_dev(dev);
454507 ssize_t ret = -EINVAL;
508
+
509
+ /* The check is also done in dev_get_phys_port_id; this helps returning
510
+ * early without hitting the trylock/restart below.
511
+ */
512
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
513
+ return -EOPNOTSUPP;
455514
456515 if (!rtnl_trylock())
457516 return restart_syscall();
....@@ -475,6 +534,13 @@
475534 struct net_device *netdev = to_net_dev(dev);
476535 ssize_t ret = -EINVAL;
477536
537
+ /* The checks are also done in dev_get_phys_port_name; this helps
538
+ * returning early without hitting the trylock/restart below.
539
+ */
540
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
541
+ !netdev->netdev_ops->ndo_get_devlink_port)
542
+ return -EOPNOTSUPP;
543
+
478544 if (!rtnl_trylock())
479545 return restart_syscall();
480546
....@@ -497,20 +563,23 @@
497563 struct net_device *netdev = to_net_dev(dev);
498564 ssize_t ret = -EINVAL;
499565
566
+ /* The checks are also done in dev_get_phys_port_name; this helps
567
+ * returning early without hitting the trylock/restart below. This works
568
+ * because recurse is false when calling dev_get_port_parent_id.
569
+ */
570
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
571
+ !netdev->netdev_ops->ndo_get_devlink_port)
572
+ return -EOPNOTSUPP;
573
+
500574 if (!rtnl_trylock())
501575 return restart_syscall();
502576
503577 if (dev_isalive(netdev)) {
504
- struct switchdev_attr attr = {
505
- .orig_dev = netdev,
506
- .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
507
- .flags = SWITCHDEV_F_NO_RECURSE,
508
- };
578
+ struct netdev_phys_item_id ppid = { };
509579
510
- ret = switchdev_port_attr_get(netdev, &attr);
580
+ ret = dev_get_port_parent_id(netdev, &ppid, false);
511581 if (!ret)
512
- ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
513
- attr.u.ppid.id);
582
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
514583 }
515584 rtnl_unlock();
516585
....@@ -534,6 +603,7 @@
534603 &dev_attr_speed.attr,
535604 &dev_attr_duplex.attr,
536605 &dev_attr_dormant.attr,
606
+ &dev_attr_testing.attr,
537607 &dev_attr_operstate.attr,
538608 &dev_attr_carrier_changes.attr,
539609 &dev_attr_ifalias.attr,
....@@ -542,6 +612,7 @@
542612 &dev_attr_flags.attr,
543613 &dev_attr_tx_queue_len.attr,
544614 &dev_attr_gro_flush_timeout.attr,
615
+ &dev_attr_napi_defer_hard_irqs.attr,
545616 &dev_attr_phys_port_id.attr,
546617 &dev_attr_phys_port_name.attr,
547618 &dev_attr_phys_switch_id.attr,
....@@ -720,7 +791,7 @@
720791 {
721792 struct rps_map *old_map, *map;
722793 cpumask_var_t mask;
723
- int err, cpu, i;
794
+ int err, cpu, i, hk_flags;
724795 static DEFINE_MUTEX(rps_map_mutex);
725796
726797 if (!capable(CAP_NET_ADMIN))
....@@ -733,6 +804,15 @@
733804 if (err) {
734805 free_cpumask_var(mask);
735806 return err;
807
+ }
808
+
809
+ if (!cpumask_empty(mask)) {
810
+ hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
811
+ cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
812
+ if (cpumask_empty(mask)) {
813
+ free_cpumask_var(mask);
814
+ return -EINVAL;
815
+ }
736816 }
737817
738818 map = kzalloc(max_t(unsigned int,
....@@ -760,9 +840,9 @@
760840 rcu_assign_pointer(queue->rps_map, map);
761841
762842 if (map)
763
- static_key_slow_inc(&rps_needed);
843
+ static_branch_inc(&rps_needed);
764844 if (old_map)
765
- static_key_slow_dec(&rps_needed);
845
+ static_branch_dec(&rps_needed);
766846
767847 mutex_unlock(&rps_map_mutex);
768848
....@@ -869,6 +949,7 @@
869949 #endif
870950 NULL
871951 };
952
+ATTRIBUTE_GROUPS(rx_queue_default);
872953
873954 static void rx_queue_release(struct kobject *kobj)
874955 {
....@@ -917,7 +998,7 @@
917998 static struct kobj_type rx_queue_ktype __ro_after_init = {
918999 .sysfs_ops = &rx_queue_sysfs_ops,
9191000 .release = rx_queue_release,
920
- .default_attrs = rx_queue_default_attrs,
1001
+ .default_groups = rx_queue_default_groups,
9211002 .namespace = rx_queue_namespace,
9221003 .get_ownership = rx_queue_get_ownership,
9231004 };
....@@ -953,6 +1034,24 @@
9531034 kobject_put(kobj);
9541035 return error;
9551036 }
1037
+
1038
+static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
1039
+ kgid_t kgid)
1040
+{
1041
+ struct netdev_rx_queue *queue = dev->_rx + index;
1042
+ struct kobject *kobj = &queue->kobj;
1043
+ int error;
1044
+
1045
+ error = sysfs_change_owner(kobj, kuid, kgid);
1046
+ if (error)
1047
+ return error;
1048
+
1049
+ if (dev->sysfs_rx_queue_group)
1050
+ error = sysfs_group_change_owner(
1051
+ kobj, dev->sysfs_rx_queue_group, kuid, kgid);
1052
+
1053
+ return error;
1054
+}
9561055 #endif /* CONFIG_SYSFS */
9571056
9581057 int
....@@ -982,6 +1081,29 @@
9821081 if (dev->sysfs_rx_queue_group)
9831082 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
9841083 kobject_put(kobj);
1084
+ }
1085
+
1086
+ return error;
1087
+#else
1088
+ return 0;
1089
+#endif
1090
+}
1091
+
1092
+static int net_rx_queue_change_owner(struct net_device *dev, int num,
1093
+ kuid_t kuid, kgid_t kgid)
1094
+{
1095
+#ifdef CONFIG_SYSFS
1096
+ int error = 0;
1097
+ int i;
1098
+
1099
+#ifndef CONFIG_RPS
1100
+ if (!dev->sysfs_rx_queue_group)
1101
+ return 0;
1102
+#endif
1103
+ for (i = 0; i < num; i++) {
1104
+ error = rx_queue_change_owner(dev, i, kuid, kgid);
1105
+ if (error)
1106
+ break;
9851107 }
9861108
9871109 return error;
....@@ -1085,8 +1207,8 @@
10851207 * belongs to the root device it will be reported with just the
10861208 * traffic class, so just "0" for TC 0 for example.
10871209 */
1088
- return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
1089
- sprintf(buf, "%u\n", tc);
1210
+ return dev->num_tc < 0 ? sprintf(buf, "%d%d\n", tc, dev->num_tc) :
1211
+ sprintf(buf, "%d\n", tc);
10901212 }
10911213
10921214 #ifdef CONFIG_XPS
....@@ -1105,6 +1227,12 @@
11051227
11061228 if (!capable(CAP_NET_ADMIN))
11071229 return -EPERM;
1230
+
1231
+ /* The check is also done later; this helps returning early without
1232
+ * hitting the trylock/restart below.
1233
+ */
1234
+ if (!dev->netdev_ops->ndo_set_tx_maxrate)
1235
+ return -EOPNOTSUPP;
11081236
11091237 err = kstrtou32(buf, 10, &rate);
11101238 if (err < 0)
....@@ -1374,8 +1502,7 @@
13741502 goto err_rtnl_unlock;
13751503 }
13761504 }
1377
- mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
1378
- GFP_KERNEL);
1505
+ mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
13791506 if (!mask) {
13801507 ret = -ENOMEM;
13811508 goto err_rtnl_unlock;
....@@ -1408,7 +1535,7 @@
14081535 rtnl_unlock();
14091536
14101537 len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
1411
- kfree(mask);
1538
+ bitmap_free(mask);
14121539
14131540 return len < PAGE_SIZE ? len : -EINVAL;
14141541
....@@ -1428,8 +1555,7 @@
14281555 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
14291556 return -EPERM;
14301557
1431
- mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
1432
- GFP_KERNEL);
1558
+ mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
14331559 if (!mask)
14341560 return -ENOMEM;
14351561
....@@ -1437,7 +1563,7 @@
14371563
14381564 err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
14391565 if (err) {
1440
- kfree(mask);
1566
+ bitmap_free(mask);
14411567 return err;
14421568 }
14431569
....@@ -1452,7 +1578,7 @@
14521578
14531579 rtnl_unlock();
14541580
1455
- kfree(mask);
1581
+ bitmap_free(mask);
14561582 return err ? : len;
14571583 }
14581584
....@@ -1470,6 +1596,7 @@
14701596 #endif
14711597 NULL
14721598 };
1599
+ATTRIBUTE_GROUPS(netdev_queue_default);
14731600
14741601 static void netdev_queue_release(struct kobject *kobj)
14751602 {
....@@ -1502,7 +1629,7 @@
15021629 static struct kobj_type netdev_queue_ktype __ro_after_init = {
15031630 .sysfs_ops = &netdev_queue_sysfs_ops,
15041631 .release = netdev_queue_release,
1505
- .default_attrs = netdev_queue_default_attrs,
1632
+ .default_groups = netdev_queue_default_groups,
15061633 .namespace = netdev_queue_namespace,
15071634 .get_ownership = netdev_queue_get_ownership,
15081635 };
....@@ -1537,6 +1664,23 @@
15371664 kobject_put(kobj);
15381665 return error;
15391666 }
1667
+
1668
+static int tx_queue_change_owner(struct net_device *ndev, int index,
1669
+ kuid_t kuid, kgid_t kgid)
1670
+{
1671
+ struct netdev_queue *queue = ndev->_tx + index;
1672
+ struct kobject *kobj = &queue->kobj;
1673
+ int error;
1674
+
1675
+ error = sysfs_change_owner(kobj, kuid, kgid);
1676
+ if (error)
1677
+ return error;
1678
+
1679
+#ifdef CONFIG_BQL
1680
+ error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
1681
+#endif
1682
+ return error;
1683
+}
15401684 #endif /* CONFIG_SYSFS */
15411685
15421686 int
....@@ -1563,6 +1707,25 @@
15631707 sysfs_remove_group(&queue->kobj, &dql_group);
15641708 #endif
15651709 kobject_put(&queue->kobj);
1710
+ }
1711
+
1712
+ return error;
1713
+#else
1714
+ return 0;
1715
+#endif /* CONFIG_SYSFS */
1716
+}
1717
+
1718
+static int net_tx_queue_change_owner(struct net_device *dev, int num,
1719
+ kuid_t kuid, kgid_t kgid)
1720
+{
1721
+#ifdef CONFIG_SYSFS
1722
+ int error = 0;
1723
+ int i;
1724
+
1725
+ for (i = 0; i < num; i++) {
1726
+ error = tx_queue_change_owner(dev, i, kuid, kgid);
1727
+ if (error)
1728
+ break;
15661729 }
15671730
15681731 return error;
....@@ -1603,6 +1766,31 @@
16031766 kset_unregister(dev->queues_kset);
16041767 #endif
16051768 return error;
1769
+}
1770
+
1771
+static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
1772
+{
1773
+ int error = 0, real_rx = 0, real_tx = 0;
1774
+
1775
+#ifdef CONFIG_SYSFS
1776
+ if (ndev->queues_kset) {
1777
+ error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
1778
+ if (error)
1779
+ return error;
1780
+ }
1781
+ real_rx = ndev->real_num_rx_queues;
1782
+#endif
1783
+ real_tx = ndev->real_num_tx_queues;
1784
+
1785
+ error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
1786
+ if (error)
1787
+ return error;
1788
+
1789
+ error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
1790
+ if (error)
1791
+ return error;
1792
+
1793
+ return 0;
16061794 }
16071795
16081796 static void remove_queue_kobjects(struct net_device *dev)
....@@ -1726,12 +1914,12 @@
17261914 #ifdef CONFIG_OF_NET
17271915 static int of_dev_node_match(struct device *dev, const void *data)
17281916 {
1729
- int ret = 0;
1917
+ for (; dev; dev = dev->parent) {
1918
+ if (dev->of_node == data)
1919
+ return 1;
1920
+ }
17301921
1731
- if (dev->parent)
1732
- ret = dev->parent->of_node == data;
1733
-
1734
- return ret == 0 ? dev->of_node == data : ret;
1922
+ return 0;
17351923 }
17361924
17371925 /*
....@@ -1821,6 +2009,37 @@
18212009 return error;
18222010 }
18232011
2012
+/* Change owner for sysfs entries when moving network devices across network
2013
+ * namespaces owned by different user namespaces.
2014
+ */
2015
+int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
2016
+ const struct net *net_new)
2017
+{
2018
+ kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2019
+ kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2020
+ struct device *dev = &ndev->dev;
2021
+ int error;
2022
+
2023
+ net_ns_get_ownership(net_old, &old_uid, &old_gid);
2024
+ net_ns_get_ownership(net_new, &new_uid, &new_gid);
2025
+
2026
+ /* The network namespace was changed but the owning user namespace is
2027
+ * identical so there's no need to change the owner of sysfs entries.
2028
+ */
2029
+ if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
2030
+ return 0;
2031
+
2032
+ error = device_change_owner(dev, new_uid, new_gid);
2033
+ if (error)
2034
+ return error;
2035
+
2036
+ error = queue_change_owner(ndev, new_uid, new_gid);
2037
+ if (error)
2038
+ return error;
2039
+
2040
+ return 0;
2041
+}
2042
+
18242043 int netdev_class_create_file_ns(const struct class_attribute *class_attr,
18252044 const void *ns)
18262045 {