hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/net/ipv4/icmp.c
....@@ -1,12 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * NET3: Implementation of the ICMP protocol layer.
34 *
45 * Alan Cox, <alan@lxorguk.ukuu.org.uk>
5
- *
6
- * This program is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU General Public License
8
- * as published by the Free Software Foundation; either version
9
- * 2 of the License, or (at your option) any later version.
106 *
117 * Some of the function names and the icmp unreach table for this
128 * module were derived from [icmp.c 1.0.11 06/02/93] by
....@@ -59,7 +55,6 @@
5955 *
6056 * - Should use skb_pull() instead of all the manual checking.
6157 * This would also greatly simply some upper layer error handlers. --AK
62
- *
6358 */
6459
6560 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -77,7 +72,6 @@
7772 #include <linux/string.h>
7873 #include <linux/netfilter_ipv4.h>
7974 #include <linux/slab.h>
80
-#include <linux/locallock.h>
8175 #include <net/snmp.h>
8276 #include <net/ip.h>
8377 #include <net/route.h>
....@@ -205,11 +199,9 @@
205199 *
206200 * On SMP we have one ICMP socket per-cpu.
207201 */
208
-static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
209
-
210202 static struct sock *icmp_sk(struct net *net)
211203 {
212
- return *this_cpu_ptr(net->ipv4.icmp_sk);
204
+ return this_cpu_read(*net->ipv4.icmp_sk);
213205 }
214206
215207 /* Called with BH disabled */
....@@ -217,16 +209,12 @@
217209 {
218210 struct sock *sk;
219211
220
- if (!local_trylock(icmp_sk_lock))
221
- return NULL;
222
-
223212 sk = icmp_sk(net);
224213
225214 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
226215 /* This can happen if the output path signals a
227216 * dst_link_failure() for an outgoing ICMP packet.
228217 */
229
- local_unlock(icmp_sk_lock);
230218 return NULL;
231219 }
232220 return sk;
....@@ -235,7 +223,6 @@
235223 static inline void icmp_xmit_unlock(struct sock *sk)
236224 {
237225 spin_unlock(&sk->sk_lock.slock);
238
- local_unlock(icmp_sk_lock);
239226 }
240227
241228 int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
....@@ -274,11 +261,12 @@
274261 spin_lock(&icmp_global.lock);
275262 delta = min_t(u32, now - icmp_global.stamp, HZ);
276263 if (delta >= HZ / 50) {
277
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
264
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
278265 if (incr)
279266 WRITE_ONCE(icmp_global.stamp, now);
280267 }
281
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
268
+ credit = min_t(u32, icmp_global.credit + incr,
269
+ READ_ONCE(sysctl_icmp_msgs_burst));
282270 if (credit) {
283271 /* We want to use a credit of one in average, but need to randomize
284272 * it for security reasons.
....@@ -302,7 +290,7 @@
302290 return true;
303291
304292 /* Limit if icmp type is enabled in ratemask. */
305
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
293
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
306294 return true;
307295
308296 return false;
....@@ -340,7 +328,8 @@
340328
341329 vif = l3mdev_master_ifindex(dst->dev);
342330 peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
343
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
331
+ rc = inet_peer_xrlim_allow(peer,
332
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
344333 if (peer)
345334 inet_putpeer(peer);
346335 out:
....@@ -368,7 +357,7 @@
368357
369358 csum = skb_copy_and_csum_bits(icmp_param->skb,
370359 icmp_param->offset + offset,
371
- to, len, 0);
360
+ to, len);
372361
373362 skb->csum = csum_block_add(skb->csum, csum, odd);
374363 if (icmp_pointers[icmp_param->data.icmph.type].error)
....@@ -392,15 +381,15 @@
392381 ip_flush_pending_frames(sk);
393382 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
394383 struct icmphdr *icmph = icmp_hdr(skb);
395
- __wsum csum = 0;
384
+ __wsum csum;
396385 struct sk_buff *skb1;
397386
387
+ csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
388
+ (char *)icmph,
389
+ icmp_param->head_len);
398390 skb_queue_walk(&sk->sk_write_queue, skb1) {
399391 csum = csum_add(csum, skb1->csum);
400392 }
401
- csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
402
- (char *)icmph,
403
- icmp_param->head_len, csum);
404393 icmph->checksum = csum_fold(csum);
405394 skb->ip_summed = CHECKSUM_NONE;
406395 ip_push_pending_frames(sk, fl4);
....@@ -443,7 +432,7 @@
443432
444433 ipcm_init(&ipc);
445434 inet->tos = ip_hdr(skb)->tos;
446
- sk->sk_mark = mark;
435
+ ipc.sockc.mark = mark;
447436 daddr = ipc.addr = ip_hdr(skb)->saddr;
448437 saddr = fib_compute_spec_dst(skb);
449438
....@@ -460,7 +449,7 @@
460449 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
461450 fl4.flowi4_proto = IPPROTO_ICMP;
462451 fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
463
- security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
452
+ security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
464453 rt = ip_route_output_key(net, &fl4);
465454 if (IS_ERR(rt))
466455 goto out_unlock;
....@@ -516,7 +505,7 @@
516505 route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
517506 fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
518507
519
- security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
508
+ security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4));
520509 rt = ip_route_output_key_hash(net, fl4, skb_in);
521510 if (IS_ERR(rt))
522511 return rt;
....@@ -718,15 +707,16 @@
718707 dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
719708
720709 if (dev)
721
- saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
710
+ saddr = inet_select_addr(dev, iph->saddr,
711
+ RT_SCOPE_LINK);
722712 else
723713 saddr = 0;
724714 rcu_read_unlock();
725715 }
726716
727
- tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
717
+ tos = icmp_pointers[type].error ? (RT_TOS(iph->tos) |
728718 IPTOS_PREC_INTERNETCONTROL) :
729
- iph->tos;
719
+ iph->tos;
730720 mark = IP4_REPLY_MARK(net, skb_in->mark);
731721
732722 if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
....@@ -744,10 +734,10 @@
744734 icmp_param.skb = skb_in;
745735 icmp_param.offset = skb_network_offset(skb_in);
746736 inet_sk(sk)->tos = tos;
747
- sk->sk_mark = mark;
748737 ipcm_init(&ipc);
749738 ipc.addr = iph->saddr;
750739 ipc.opt = &icmp_param.replyopts.opt;
740
+ ipc.sockc.mark = mark;
751741
752742 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
753743 type, code, &icmp_param);
....@@ -765,6 +755,11 @@
765755 room = 576;
766756 room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
767757 room -= sizeof(struct icmphdr);
758
+ /* Guard against tiny mtu. We need to include at least one
759
+ * IP network header for this message to make any sense.
760
+ */
761
+ if (room <= (int)sizeof(struct iphdr))
762
+ goto ende;
768763
769764 icmp_param.data_len = skb_in->len - icmp_param.offset;
770765 if (icmp_param.data_len > room)
....@@ -826,7 +821,7 @@
826821
827822 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
828823 {
829
- const struct iphdr *iph = (const struct iphdr *) skb->data;
824
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
830825 const struct net_protocol *ipprot;
831826 int protocol = iph->protocol;
832827
....@@ -895,9 +890,9 @@
895890 case ICMP_FRAG_NEEDED:
896891 /* for documentation of the ip_no_pmtu_disc
897892 * values please see
898
- * Documentation/networking/ip-sysctl.txt
893
+ * Documentation/networking/ip-sysctl.rst
899894 */
900
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
895
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
901896 default:
902897 net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
903898 &iph->daddr);
....@@ -907,7 +902,7 @@
907902 case 3:
908903 if (!icmp_tag_validation(iph->protocol))
909904 goto out;
910
- /* fall through */
905
+ fallthrough;
911906 case 0:
912907 info = ntohs(icmph->un.frag.mtu);
913908 }
....@@ -985,7 +980,7 @@
985980 return false;
986981 }
987982
988
- icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway);
983
+ icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
989984 return true;
990985 }
991986
....@@ -1158,7 +1153,66 @@
11581153 goto drop;
11591154 }
11601155
1161
-void icmp_err(struct sk_buff *skb, u32 info)
1156
+static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
1157
+{
1158
+ struct icmp_extobj_hdr *objh, _objh;
1159
+ struct icmp_ext_hdr *exth, _exth;
1160
+ u16 olen;
1161
+
1162
+ exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth);
1163
+ if (!exth)
1164
+ return false;
1165
+ if (exth->version != 2)
1166
+ return true;
1167
+
1168
+ if (exth->checksum &&
1169
+ csum_fold(skb_checksum(skb, off, skb->len - off, 0)))
1170
+ return false;
1171
+
1172
+ off += sizeof(_exth);
1173
+ while (off < skb->len) {
1174
+ objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh);
1175
+ if (!objh)
1176
+ return false;
1177
+
1178
+ olen = ntohs(objh->length);
1179
+ if (olen < sizeof(_objh))
1180
+ return false;
1181
+
1182
+ off += olen;
1183
+ if (off > skb->len)
1184
+ return false;
1185
+ }
1186
+
1187
+ return true;
1188
+}
1189
+
1190
+void ip_icmp_error_rfc4884(const struct sk_buff *skb,
1191
+ struct sock_ee_data_rfc4884 *out,
1192
+ int thlen, int off)
1193
+{
1194
+ int hlen;
1195
+
1196
+ /* original datagram headers: end of icmph to payload (skb->data) */
1197
+ hlen = -skb_transport_offset(skb) - thlen;
1198
+
1199
+ /* per rfc 4884: minimal datagram length of 128 bytes */
1200
+ if (off < 128 || off < hlen)
1201
+ return;
1202
+
1203
+ /* kernel has stripped headers: return payload offset in bytes */
1204
+ off -= hlen;
1205
+ if (off + sizeof(struct icmp_ext_hdr) > skb->len)
1206
+ return;
1207
+
1208
+ out->len = off;
1209
+
1210
+ if (!ip_icmp_error_rfc4884_validate(skb, off))
1211
+ out->flags |= SO_EE_RFC4884_FLAG_INVALID;
1212
+}
1213
+EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
1214
+
1215
+int icmp_err(struct sk_buff *skb, u32 info)
11621216 {
11631217 struct iphdr *iph = (struct iphdr *)skb->data;
11641218 int offset = iph->ihl<<2;
....@@ -1173,13 +1227,15 @@
11731227 */
11741228 if (icmph->type != ICMP_ECHOREPLY) {
11751229 ping_err(skb, offset, info);
1176
- return;
1230
+ return 0;
11771231 }
11781232
11791233 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
1180
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0);
1234
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
11811235 else if (type == ICMP_REDIRECT)
1182
- ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0);
1236
+ ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
1237
+
1238
+ return 0;
11831239 }
11841240
11851241 /*
....@@ -1322,9 +1378,7 @@
13221378 return 0;
13231379
13241380 fail:
1325
- for_each_possible_cpu(i)
1326
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
1327
- free_percpu(net->ipv4.icmp_sk);
1381
+ icmp_sk_exit(net);
13281382 return err;
13291383 }
13301384