hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/ipv6/icmp.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Internet Control Message Protocol (ICMPv6)
34 * Linux INET6 implementation
....@@ -8,11 +9,6 @@
89 * Based on net/ipv4/icmp.c
910 *
1011 * RFC 1885
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version
15
- * 2 of the License, or (at your option) any later version.
1612 */
1713
1814 /*
....@@ -79,12 +75,12 @@
7975 *
8076 * On SMP we have one ICMP socket per-cpu.
8177 */
82
-static inline struct sock *icmpv6_sk(struct net *net)
78
+static struct sock *icmpv6_sk(struct net *net)
8379 {
84
- return net->ipv6.icmp_sk[smp_processor_id()];
80
+ return this_cpu_read(*net->ipv6.icmp_sk);
8581 }
8682
87
-static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
83
+static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
8884 u8 type, u8 code, int offset, __be32 info)
8985 {
9086 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
....@@ -100,6 +96,8 @@
10096 if (!(type & ICMPV6_INFOMSG_MASK))
10197 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
10298 ping_err(skb, offset, ntohl(info));
99
+
100
+ return 0;
103101 }
104102
105103 static int icmpv6_rcv(struct sk_buff *skb);
....@@ -160,28 +158,33 @@
160158 tp = skb_header_pointer(skb,
161159 ptr+offsetof(struct icmp6hdr, icmp6_type),
162160 sizeof(_type), &_type);
163
- if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161
+
162
+ /* Based on RFC 8200, Section 4.5 Fragment Header, return
163
+ * false if this is a fragment packet with no icmp header info.
164
+ */
165
+ if (!tp && frag_off != 0)
166
+ return false;
167
+ else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164168 return true;
165169 }
166170 return false;
167171 }
168172
169
-static bool icmpv6_mask_allow(int type)
173
+static bool icmpv6_mask_allow(struct net *net, int type)
170174 {
171
- /* Informational messages are not limited. */
172
- if (type & ICMPV6_INFOMSG_MASK)
175
+ if (type > ICMPV6_MSG_MAX)
173176 return true;
174177
175
- /* Do not limit pmtu discovery, it would break it. */
176
- if (type == ICMPV6_PKT_TOOBIG)
178
+ /* Limit if icmp type is set in ratemask. */
179
+ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
177180 return true;
178181
179182 return false;
180183 }
181184
182
-static bool icmpv6_global_allow(int type)
185
+static bool icmpv6_global_allow(struct net *net, int type)
183186 {
184
- if (icmpv6_mask_allow(type))
187
+ if (icmpv6_mask_allow(net, type))
185188 return true;
186189
187190 if (icmp_global_allow())
....@@ -200,7 +203,7 @@
200203 struct dst_entry *dst;
201204 bool res = false;
202205
203
- if (icmpv6_mask_allow(type))
206
+ if (icmpv6_mask_allow(net, type))
204207 return true;
205208
206209 /*
....@@ -227,6 +230,25 @@
227230 res = inet_peer_xrlim_allow(peer, tmo);
228231 if (peer)
229232 inet_putpeer(peer);
233
+ }
234
+ dst_release(dst);
235
+ return res;
236
+}
237
+
238
+static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
239
+ struct flowi6 *fl6)
240
+{
241
+ struct net *net = sock_net(sk);
242
+ struct dst_entry *dst;
243
+ bool res = false;
244
+
245
+ dst = ip6_route_output(net, sk, fl6);
246
+ if (!dst->error) {
247
+ struct rt6_info *rt = (struct rt6_info *)dst;
248
+ struct in6_addr prefsrc;
249
+
250
+ rt6_get_prefsrc(rt, &prefsrc);
251
+ res = !ipv6_addr_any(&prefsrc);
230252 }
231253 dst_release(dst);
232254 return res;
....@@ -298,10 +320,10 @@
298320 {
299321 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
300322 struct sk_buff *org_skb = msg->skb;
301
- __wsum csum = 0;
323
+ __wsum csum;
302324
303325 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
304
- to, len, csum);
326
+ to, len);
305327 skb->csum = csum_block_add(skb->csum, csum, odd);
306328 if (!(msg->type & ICMPV6_INFOMSG_MASK))
307329 nf_ct_attach(skb, org_skb);
....@@ -395,23 +417,31 @@
395417 return ERR_PTR(err);
396418 }
397419
398
-static int icmp6_iif(const struct sk_buff *skb)
420
+static struct net_device *icmp6_dev(const struct sk_buff *skb)
399421 {
400
- int iif = skb->dev->ifindex;
422
+ struct net_device *dev = skb->dev;
401423
402424 /* for local traffic to local address, skb dev is the loopback
403425 * device. Check if there is a dst attached to the skb and if so
404426 * get the real device index. Same is needed for replies to a link
405427 * local address on a device enslaved to an L3 master device
406428 */
407
- if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
429
+ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
408430 const struct rt6_info *rt6 = skb_rt6_info(skb);
409431
410
- if (rt6)
411
- iif = rt6->rt6i_idev->dev->ifindex;
432
+ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
433
+ * and ip6_null_entry could be set to skb if no route is found.
434
+ */
435
+ if (rt6 && rt6->rt6i_idev)
436
+ dev = rt6->rt6i_idev->dev;
412437 }
413438
414
- return iif;
439
+ return dev;
440
+}
441
+
442
+static int icmp6_iif(const struct sk_buff *skb)
443
+{
444
+ return icmp6_dev(skb)->ifindex;
415445 }
416446
417447 /*
....@@ -480,8 +510,11 @@
480510 if (__ipv6_addr_needs_scope_id(addr_type)) {
481511 iif = icmp6_iif(skb);
482512 } else {
483
- dst = skb_dst(skb);
484
- iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
513
+ /*
514
+ * The source device is used for looking up which routing table
515
+ * to use for sending an ICMP error.
516
+ */
517
+ iif = l3mdev_master_ifindex(skb->dev);
485518 }
486519
487520 /*
....@@ -509,31 +542,42 @@
509542 local_bh_disable();
510543
511544 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
512
- if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
545
+ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
513546 goto out_bh_enable;
514547
515548 mip6_addr_swap(skb, parm);
549
+
550
+ sk = icmpv6_xmit_lock(net);
551
+ if (!sk)
552
+ goto out_bh_enable;
516553
517554 memset(&fl6, 0, sizeof(fl6));
518555 fl6.flowi6_proto = IPPROTO_ICMPV6;
519556 fl6.daddr = hdr->saddr;
520557 if (force_saddr)
521558 saddr = force_saddr;
522
- if (saddr)
559
+ if (saddr) {
523560 fl6.saddr = *saddr;
561
+ } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
562
+ /* select a more meaningful saddr from input if */
563
+ struct net_device *in_netdev;
564
+
565
+ in_netdev = dev_get_by_index(net, parm->iif);
566
+ if (in_netdev) {
567
+ ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
568
+ inet6_sk(sk)->srcprefs,
569
+ &fl6.saddr);
570
+ dev_put(in_netdev);
571
+ }
572
+ }
524573 fl6.flowi6_mark = mark;
525574 fl6.flowi6_oif = iif;
526575 fl6.fl6_icmp_type = type;
527576 fl6.fl6_icmp_code = code;
528577 fl6.flowi6_uid = sock_net_uid(net, NULL);
529578 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
530
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
579
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
531580
532
- sk = icmpv6_xmit_lock(net);
533
- if (!sk)
534
- goto out_bh_enable;
535
-
536
- sk->sk_mark = mark;
537581 np = inet6_sk(sk);
538582
539583 if (!icmpv6_xrlim_allow(sk, type, &fl6))
....@@ -550,6 +594,7 @@
550594 fl6.flowi6_oif = np->ucast_oif;
551595
552596 ipcm6_init_sk(&ipc6, np);
597
+ ipc6.sockc.mark = mark;
553598 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
554599
555600 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
....@@ -682,18 +727,29 @@
682727 struct dst_entry *dst;
683728 struct ipcm6_cookie ipc6;
684729 u32 mark = IP6_REPLY_MARK(net, skb->mark);
730
+ bool acast;
731
+
732
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
733
+ net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
734
+ return;
685735
686736 saddr = &ipv6_hdr(skb)->daddr;
687737
738
+ acast = ipv6_anycast_destination(skb_dst(skb), saddr);
739
+ if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
740
+ return;
741
+
688742 if (!ipv6_unicast_destination(skb) &&
689
- !(net->ipv6.sysctl.anycast_src_echo_reply &&
690
- ipv6_anycast_destination(skb_dst(skb), saddr)))
743
+ !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
691744 saddr = NULL;
692745
693746 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
694747 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
695748
696749 memset(&fl6, 0, sizeof(fl6));
750
+ if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
751
+ fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
752
+
697753 fl6.flowi6_proto = IPPROTO_ICMPV6;
698754 fl6.daddr = ipv6_hdr(skb)->saddr;
699755 if (saddr)
....@@ -702,13 +758,12 @@
702758 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
703759 fl6.flowi6_mark = mark;
704760 fl6.flowi6_uid = sock_net_uid(net, NULL);
705
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
761
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
706762
707763 local_bh_disable();
708764 sk = icmpv6_xmit_lock(net);
709765 if (!sk)
710766 goto out_bh_enable;
711
- sk->sk_mark = mark;
712767 np = inet6_sk(sk);
713768
714769 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
....@@ -722,6 +777,11 @@
722777 if (IS_ERR(dst))
723778 goto out;
724779
780
+ /* Check the ratelimit */
781
+ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
782
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
783
+ goto out_dst_release;
784
+
725785 idev = __in6_dev_get(skb->dev);
726786
727787 msg.skb = skb;
....@@ -731,6 +791,7 @@
731791 ipcm6_init_sk(&ipc6, np);
732792 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
733793 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
794
+ ipc6.sockc.mark = mark;
734795
735796 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
736797 skb->len + sizeof(struct icmp6hdr),
....@@ -742,6 +803,7 @@
742803 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
743804 skb->len + sizeof(struct icmp6hdr));
744805 }
806
+out_dst_release:
745807 dst_release(dst);
746808 out:
747809 icmpv6_xmit_unlock(sk);
....@@ -800,7 +862,7 @@
800862 static int icmpv6_rcv(struct sk_buff *skb)
801863 {
802864 struct net *net = dev_net(skb->dev);
803
- struct net_device *dev = skb->dev;
865
+ struct net_device *dev = icmp6_dev(skb);
804866 struct inet6_dev *idev = __in6_dev_get(dev);
805867 const struct in6_addr *saddr, *daddr;
806868 struct icmp6hdr *hdr;
....@@ -868,7 +930,7 @@
868930 hdr = icmp6_hdr(skb);
869931
870932 /* to notify */
871
- /* fall through */
933
+ fallthrough;
872934 case ICMPV6_DEST_UNREACH:
873935 case ICMPV6_TIME_EXCEED:
874936 case ICMPV6_PARAMPROB:
....@@ -949,16 +1011,24 @@
9491011 fl6->fl6_icmp_type = type;
9501012 fl6->fl6_icmp_code = 0;
9511013 fl6->flowi6_oif = oif;
952
- security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
1014
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1015
+}
1016
+
1017
+static void __net_exit icmpv6_sk_exit(struct net *net)
1018
+{
1019
+ int i;
1020
+
1021
+ for_each_possible_cpu(i)
1022
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1023
+ free_percpu(net->ipv6.icmp_sk);
9531024 }
9541025
9551026 static int __net_init icmpv6_sk_init(struct net *net)
9561027 {
9571028 struct sock *sk;
958
- int err, i, j;
1029
+ int err, i;
9591030
960
- net->ipv6.icmp_sk =
961
- kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
1031
+ net->ipv6.icmp_sk = alloc_percpu(struct sock *);
9621032 if (!net->ipv6.icmp_sk)
9631033 return -ENOMEM;
9641034
....@@ -971,7 +1041,7 @@
9711041 goto fail;
9721042 }
9731043
974
- net->ipv6.icmp_sk[i] = sk;
1044
+ *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
9751045
9761046 /* Enough space for 2 64K ICMP packets, including
9771047 * sk_buff struct overhead.
....@@ -981,20 +1051,8 @@
9811051 return 0;
9821052
9831053 fail:
984
- for (j = 0; j < i; j++)
985
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
986
- kfree(net->ipv6.icmp_sk);
1054
+ icmpv6_sk_exit(net);
9871055 return err;
988
-}
989
-
990
-static void __net_exit icmpv6_sk_exit(struct net *net)
991
-{
992
- int i;
993
-
994
- for_each_possible_cpu(i) {
995
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
996
- }
997
- kfree(net->ipv6.icmp_sk);
9981056 }
9991057
10001058 static struct pernet_operations icmpv6_sk_ops = {
....@@ -1118,6 +1176,27 @@
11181176 .mode = 0644,
11191177 .proc_handler = proc_dointvec,
11201178 },
1179
+ {
1180
+ .procname = "echo_ignore_multicast",
1181
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1182
+ .maxlen = sizeof(int),
1183
+ .mode = 0644,
1184
+ .proc_handler = proc_dointvec,
1185
+ },
1186
+ {
1187
+ .procname = "echo_ignore_anycast",
1188
+ .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1189
+ .maxlen = sizeof(int),
1190
+ .mode = 0644,
1191
+ .proc_handler = proc_dointvec,
1192
+ },
1193
+ {
1194
+ .procname = "ratemask",
1195
+ .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1196
+ .maxlen = ICMPV6_MSG_MAX + 1,
1197
+ .mode = 0644,
1198
+ .proc_handler = proc_do_large_bitmap,
1199
+ },
11211200 { },
11221201 };
11231202
....@@ -1132,6 +1211,9 @@
11321211 if (table) {
11331212 table[0].data = &net->ipv6.sysctl.icmpv6_time;
11341213 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1214
+ table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1215
+ table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1216
+ table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
11351217 }
11361218 return table;
11371219 }