hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/net/ipv6/icmp.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Internet Control Message Protocol (ICMPv6)
34 * Linux INET6 implementation
....@@ -8,11 +9,6 @@
89 * Based on net/ipv4/icmp.c
910 *
1011 * RFC 1885
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version
15
- * 2 of the License, or (at your option) any later version.
1612 */
1713
1814 /*
....@@ -79,12 +75,12 @@
7975 *
8076 * On SMP we have one ICMP socket per-cpu.
8177 */
82
-static inline struct sock *icmpv6_sk(struct net *net)
78
+static struct sock *icmpv6_sk(struct net *net)
8379 {
84
- return net->ipv6.icmp_sk[smp_processor_id()];
80
+ return this_cpu_read(*net->ipv6.icmp_sk);
8581 }
8682
87
-static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
83
+static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
8884 u8 type, u8 code, int offset, __be32 info)
8985 {
9086 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
....@@ -100,6 +96,8 @@
10096 if (!(type & ICMPV6_INFOMSG_MASK))
10197 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
10298 ping_err(skb, offset, ntohl(info));
99
+
100
+ return 0;
103101 }
104102
105103 static int icmpv6_rcv(struct sk_buff *skb);
....@@ -160,28 +158,33 @@
160158 tp = skb_header_pointer(skb,
161159 ptr+offsetof(struct icmp6hdr, icmp6_type),
162160 sizeof(_type), &_type);
163
- if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161
+
162
+ /* Based on RFC 8200, Section 4.5 Fragment Header, return
163
+ * false if this is a fragment packet with no icmp header info.
164
+ */
165
+ if (!tp && frag_off != 0)
166
+ return false;
167
+ else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164168 return true;
165169 }
166170 return false;
167171 }
168172
169
-static bool icmpv6_mask_allow(int type)
173
+static bool icmpv6_mask_allow(struct net *net, int type)
170174 {
171
- /* Informational messages are not limited. */
172
- if (type & ICMPV6_INFOMSG_MASK)
175
+ if (type > ICMPV6_MSG_MAX)
173176 return true;
174177
175
- /* Do not limit pmtu discovery, it would break it. */
176
- if (type == ICMPV6_PKT_TOOBIG)
178
+ /* Limit if icmp type is set in ratemask. */
179
+ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
177180 return true;
178181
179182 return false;
180183 }
181184
182
-static bool icmpv6_global_allow(int type)
185
+static bool icmpv6_global_allow(struct net *net, int type)
183186 {
184
- if (icmpv6_mask_allow(type))
187
+ if (icmpv6_mask_allow(net, type))
185188 return true;
186189
187190 if (icmp_global_allow())
....@@ -200,7 +203,7 @@
200203 struct dst_entry *dst;
201204 bool res = false;
202205
203
- if (icmpv6_mask_allow(type))
206
+ if (icmpv6_mask_allow(net, type))
204207 return true;
205208
206209 /*
....@@ -227,6 +230,25 @@
227230 res = inet_peer_xrlim_allow(peer, tmo);
228231 if (peer)
229232 inet_putpeer(peer);
233
+ }
234
+ dst_release(dst);
235
+ return res;
236
+}
237
+
238
+static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
239
+ struct flowi6 *fl6)
240
+{
241
+ struct net *net = sock_net(sk);
242
+ struct dst_entry *dst;
243
+ bool res = false;
244
+
245
+ dst = ip6_route_output(net, sk, fl6);
246
+ if (!dst->error) {
247
+ struct rt6_info *rt = (struct rt6_info *)dst;
248
+ struct in6_addr prefsrc;
249
+
250
+ rt6_get_prefsrc(rt, &prefsrc);
251
+ res = !ipv6_addr_any(&prefsrc);
230252 }
231253 dst_release(dst);
232254 return res;
....@@ -298,10 +320,10 @@
298320 {
299321 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
300322 struct sk_buff *org_skb = msg->skb;
301
- __wsum csum = 0;
323
+ __wsum csum;
302324
303325 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
304
- to, len, csum);
326
+ to, len);
305327 skb->csum = csum_block_add(skb->csum, csum, odd);
306328 if (!(msg->type & ICMPV6_INFOMSG_MASK))
307329 nf_ct_attach(skb, org_skb);
....@@ -395,23 +417,28 @@
395417 return ERR_PTR(err);
396418 }
397419
398
-static int icmp6_iif(const struct sk_buff *skb)
420
+static struct net_device *icmp6_dev(const struct sk_buff *skb)
399421 {
400
- int iif = skb->dev->ifindex;
422
+ struct net_device *dev = skb->dev;
401423
402424 /* for local traffic to local address, skb dev is the loopback
403425 * device. Check if there is a dst attached to the skb and if so
404426 * get the real device index. Same is needed for replies to a link
405427 * local address on a device enslaved to an L3 master device
406428 */
407
- if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
429
+ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
408430 const struct rt6_info *rt6 = skb_rt6_info(skb);
409431
410432 if (rt6)
411
- iif = rt6->rt6i_idev->dev->ifindex;
433
+ dev = rt6->rt6i_idev->dev;
412434 }
413435
414
- return iif;
436
+ return dev;
437
+}
438
+
439
+static int icmp6_iif(const struct sk_buff *skb)
440
+{
441
+ return icmp6_dev(skb)->ifindex;
415442 }
416443
417444 /*
....@@ -480,8 +507,11 @@
480507 if (__ipv6_addr_needs_scope_id(addr_type)) {
481508 iif = icmp6_iif(skb);
482509 } else {
483
- dst = skb_dst(skb);
484
- iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
510
+ /*
511
+ * The source device is used for looking up which routing table
512
+ * to use for sending an ICMP error.
513
+ */
514
+ iif = l3mdev_master_ifindex(skb->dev);
485515 }
486516
487517 /*
....@@ -509,31 +539,42 @@
509539 local_bh_disable();
510540
511541 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
512
- if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
542
+ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
513543 goto out_bh_enable;
514544
515545 mip6_addr_swap(skb, parm);
546
+
547
+ sk = icmpv6_xmit_lock(net);
548
+ if (!sk)
549
+ goto out_bh_enable;
516550
517551 memset(&fl6, 0, sizeof(fl6));
518552 fl6.flowi6_proto = IPPROTO_ICMPV6;
519553 fl6.daddr = hdr->saddr;
520554 if (force_saddr)
521555 saddr = force_saddr;
522
- if (saddr)
556
+ if (saddr) {
523557 fl6.saddr = *saddr;
558
+ } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
559
+ /* select a more meaningful saddr from input if */
560
+ struct net_device *in_netdev;
561
+
562
+ in_netdev = dev_get_by_index(net, parm->iif);
563
+ if (in_netdev) {
564
+ ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
565
+ inet6_sk(sk)->srcprefs,
566
+ &fl6.saddr);
567
+ dev_put(in_netdev);
568
+ }
569
+ }
524570 fl6.flowi6_mark = mark;
525571 fl6.flowi6_oif = iif;
526572 fl6.fl6_icmp_type = type;
527573 fl6.fl6_icmp_code = code;
528574 fl6.flowi6_uid = sock_net_uid(net, NULL);
529575 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
530
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
576
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
531577
532
- sk = icmpv6_xmit_lock(net);
533
- if (!sk)
534
- goto out_bh_enable;
535
-
536
- sk->sk_mark = mark;
537578 np = inet6_sk(sk);
538579
539580 if (!icmpv6_xrlim_allow(sk, type, &fl6))
....@@ -550,6 +591,7 @@
550591 fl6.flowi6_oif = np->ucast_oif;
551592
552593 ipcm6_init_sk(&ipc6, np);
594
+ ipc6.sockc.mark = mark;
553595 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
554596
555597 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
....@@ -682,18 +724,29 @@
682724 struct dst_entry *dst;
683725 struct ipcm6_cookie ipc6;
684726 u32 mark = IP6_REPLY_MARK(net, skb->mark);
727
+ bool acast;
728
+
729
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
730
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
731
+ return;
685732
686733 saddr = &ipv6_hdr(skb)->daddr;
687734
735
+ acast = ipv6_anycast_destination(skb_dst(skb), saddr);
736
+ if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
737
+ return;
738
+
688739 if (!ipv6_unicast_destination(skb) &&
689
- !(net->ipv6.sysctl.anycast_src_echo_reply &&
690
- ipv6_anycast_destination(skb_dst(skb), saddr)))
740
+ !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
691741 saddr = NULL;
692742
693743 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
694744 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
695745
696746 memset(&fl6, 0, sizeof(fl6));
747
+ if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
748
+ fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
749
+
697750 fl6.flowi6_proto = IPPROTO_ICMPV6;
698751 fl6.daddr = ipv6_hdr(skb)->saddr;
699752 if (saddr)
....@@ -702,13 +755,12 @@
702755 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
703756 fl6.flowi6_mark = mark;
704757 fl6.flowi6_uid = sock_net_uid(net, NULL);
705
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
758
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
706759
707760 local_bh_disable();
708761 sk = icmpv6_xmit_lock(net);
709762 if (!sk)
710763 goto out_bh_enable;
711
- sk->sk_mark = mark;
712764 np = inet6_sk(sk);
713765
714766 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
....@@ -722,6 +774,11 @@
722774 if (IS_ERR(dst))
723775 goto out;
724776
777
+ /* Check the ratelimit */
778
+ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
779
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
780
+ goto out_dst_release;
781
+
725782 idev = __in6_dev_get(skb->dev);
726783
727784 msg.skb = skb;
....@@ -731,6 +788,7 @@
731788 ipcm6_init_sk(&ipc6, np);
732789 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
733790 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
791
+ ipc6.sockc.mark = mark;
734792
735793 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
736794 skb->len + sizeof(struct icmp6hdr),
....@@ -742,6 +800,7 @@
742800 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
743801 skb->len + sizeof(struct icmp6hdr));
744802 }
803
+out_dst_release:
745804 dst_release(dst);
746805 out:
747806 icmpv6_xmit_unlock(sk);
....@@ -800,7 +859,7 @@
800859 static int icmpv6_rcv(struct sk_buff *skb)
801860 {
802861 struct net *net = dev_net(skb->dev);
803
- struct net_device *dev = skb->dev;
862
+ struct net_device *dev = icmp6_dev(skb);
804863 struct inet6_dev *idev = __in6_dev_get(dev);
805864 const struct in6_addr *saddr, *daddr;
806865 struct icmp6hdr *hdr;
....@@ -868,7 +927,7 @@
868927 hdr = icmp6_hdr(skb);
869928
870929 /* to notify */
871
- /* fall through */
930
+ fallthrough;
872931 case ICMPV6_DEST_UNREACH:
873932 case ICMPV6_TIME_EXCEED:
874933 case ICMPV6_PARAMPROB:
....@@ -949,16 +1008,24 @@
9491008 fl6->fl6_icmp_type = type;
9501009 fl6->fl6_icmp_code = 0;
9511010 fl6->flowi6_oif = oif;
952
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
1011
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1012
+}
1013
+
1014
+static void __net_exit icmpv6_sk_exit(struct net *net)
1015
+{
1016
+ int i;
1017
+
1018
+ for_each_possible_cpu(i)
1019
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1020
+ free_percpu(net->ipv6.icmp_sk);
9531021 }
9541022
9551023 static int __net_init icmpv6_sk_init(struct net *net)
9561024 {
9571025 struct sock *sk;
958
- int err, i, j;
1026
+ int err, i;
9591027
960
- net->ipv6.icmp_sk =
961
- kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
1028
+ net->ipv6.icmp_sk = alloc_percpu(struct sock *);
9621029 if (!net->ipv6.icmp_sk)
9631030 return -ENOMEM;
9641031
....@@ -971,7 +1038,7 @@
9711038 goto fail;
9721039 }
9731040
974
- net->ipv6.icmp_sk[i] = sk;
1041
+ *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
9751042
9761043 /* Enough space for 2 64K ICMP packets, including
9771044 * sk_buff struct overhead.
....@@ -981,20 +1048,8 @@
9811048 return 0;
9821049
9831050 fail:
984
- for (j = 0; j < i; j++)
985
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
986
- kfree(net->ipv6.icmp_sk);
1051
+ icmpv6_sk_exit(net);
9871052 return err;
988
-}
989
-
990
-static void __net_exit icmpv6_sk_exit(struct net *net)
991
-{
992
- int i;
993
-
994
- for_each_possible_cpu(i) {
995
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
996
- }
997
- kfree(net->ipv6.icmp_sk);
9981053 }
9991054
10001055 static struct pernet_operations icmpv6_sk_ops = {
....@@ -1118,6 +1173,27 @@
11181173 .mode = 0644,
11191174 .proc_handler = proc_dointvec,
11201175 },
1176
+ {
1177
+ .procname = "echo_ignore_multicast",
1178
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1179
+ .maxlen = sizeof(int),
1180
+ .mode = 0644,
1181
+ .proc_handler = proc_dointvec,
1182
+ },
1183
+ {
1184
+ .procname = "echo_ignore_anycast",
1185
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1186
+ .maxlen = sizeof(int),
1187
+ .mode = 0644,
1188
+ .proc_handler = proc_dointvec,
1189
+ },
1190
+ {
1191
+ .procname = "ratemask",
1192
+ .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1193
+ .maxlen = ICMPV6_MSG_MAX + 1,
1194
+ .mode = 0644,
1195
+ .proc_handler = proc_do_large_bitmap,
1196
+ },
11211197 { },
11221198 };
11231199
....@@ -1132,6 +1208,9 @@
11321208 if (table) {
11331209 table[0].data = &net->ipv6.sysctl.icmpv6_time;
11341210 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1211
+ table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1212
+ table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1213
+ table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
11351214 }
11361215 return table;
11371216 }