.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
43 | 44 | * Chetan Loke : Implemented TPACKET_V3 block abstraction |
---|
44 | 45 | * layer. |
---|
45 | 46 | * Copyright (C) 2011, <lokec@ccs.neu.edu> |
---|
46 | | - * |
---|
47 | | - * |
---|
48 | | - * This program is free software; you can redistribute it and/or |
---|
49 | | - * modify it under the terms of the GNU General Public License |
---|
50 | | - * as published by the Free Software Foundation; either version |
---|
51 | | - * 2 of the License, or (at your option) any later version. |
---|
52 | | - * |
---|
53 | 47 | */ |
---|
54 | 48 | |
---|
55 | 49 | #include <linux/types.h> |
---|
.. | .. |
---|
99 | 93 | |
---|
100 | 94 | /* |
---|
101 | 95 | Assumptions: |
---|
102 | | - - if device has no dev->hard_header routine, it adds and removes ll header |
---|
103 | | - inside itself. In this case ll header is invisible outside of device, |
---|
104 | | - but higher levels still should reserve dev->hard_header_len. |
---|
105 | | - Some devices are enough clever to reallocate skb, when header |
---|
106 | | - will not fit to reserved space (tunnel), another ones are silly |
---|
107 | | - (PPP). |
---|
| 96 | + - If the device has no dev->header_ops->create, there is no LL header |
---|
| 97 | + visible above the device. In this case, its hard_header_len should be 0. |
---|
| 98 | + The device may prepend its own header internally. In this case, its |
---|
| 99 | + needed_headroom should be set to the space needed for it to add its |
---|
| 100 | + internal header. |
---|
| 101 | + For example, a WiFi driver pretending to be an Ethernet driver should |
---|
| 102 | + set its hard_header_len to be the Ethernet header length, and set its |
---|
| 103 | + needed_headroom to be (the real WiFi header length - the fake Ethernet |
---|
| 104 | + header length). |
---|
108 | 105 | - packet socket receives packets with pulled ll header, |
---|
109 | 106 | so that SOCK_RAW should push it back. |
---|
110 | 107 | |
---|
111 | 108 | On receive: |
---|
112 | 109 | ----------- |
---|
113 | 110 | |
---|
114 | | -Incoming, dev->hard_header!=NULL |
---|
| 111 | +Incoming, dev_has_header(dev) == true |
---|
115 | 112 | mac_header -> ll header |
---|
116 | 113 | data -> data |
---|
117 | 114 | |
---|
118 | | -Outgoing, dev->hard_header!=NULL |
---|
| 115 | +Outgoing, dev_has_header(dev) == true |
---|
119 | 116 | mac_header -> ll header |
---|
120 | 117 | data -> ll header |
---|
121 | 118 | |
---|
122 | | -Incoming, dev->hard_header==NULL |
---|
123 | | - mac_header -> UNKNOWN position. It is very likely, that it points to ll |
---|
124 | | - header. PPP makes it, that is wrong, because introduce |
---|
125 | | - assymetry between rx and tx paths. |
---|
| 119 | +Incoming, dev_has_header(dev) == false |
---|
| 120 | + mac_header -> data |
---|
| 121 | + However drivers often make it point to the ll header. |
---|
| 122 | + This is incorrect because the ll header should be invisible to us. |
---|
126 | 123 | data -> data |
---|
127 | 124 | |
---|
128 | | -Outgoing, dev->hard_header==NULL |
---|
129 | | - mac_header -> data. ll header is still not built! |
---|
| 125 | +Outgoing, dev_has_header(dev) == false |
---|
| 126 | + mac_header -> data. ll header is invisible to us. |
---|
130 | 127 | data -> data |
---|
131 | 128 | |
---|
132 | 129 | Resume |
---|
133 | | - If dev->hard_header==NULL we are unlikely to restore sensible ll header. |
---|
| 130 | + If dev_has_header(dev) == false we are unable to restore the ll header, |
---|
| 131 | + because it is invisible to us. |
---|
134 | 132 | |
---|
135 | 133 | |
---|
136 | 134 | On transmit: |
---|
137 | 135 | ------------ |
---|
138 | 136 | |
---|
139 | | -dev->hard_header != NULL |
---|
| 137 | +dev->header_ops != NULL |
---|
140 | 138 | mac_header -> ll header |
---|
141 | 139 | data -> ll header |
---|
142 | 140 | |
---|
143 | | -dev->hard_header == NULL (ll header is added by device, we cannot control it) |
---|
| 141 | +dev->header_ops == NULL (ll header is invisible to us) |
---|
144 | 142 | mac_header -> data |
---|
145 | 143 | data -> data |
---|
146 | 144 | |
---|
147 | | - We should set nh.raw on output to correct posistion, |
---|
| 145 | + We should set network_header on output to the correct position, |
---|
148 | 146 | packet classifier depends on it. |
---|
149 | 147 | */ |
---|
150 | 148 | |
---|
.. | .. |
---|
183 | 181 | #define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) |
---|
184 | 182 | #define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) |
---|
185 | 183 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) |
---|
186 | | -#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) |
---|
187 | 184 | |
---|
188 | 185 | struct packet_sock; |
---|
189 | 186 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, |
---|
.. | .. |
---|
275 | 272 | return po->xmit == packet_direct_xmit; |
---|
276 | 273 | } |
---|
277 | 274 | |
---|
278 | | -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, |
---|
279 | | - struct net_device *sb_dev) |
---|
280 | | -{ |
---|
281 | | - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); |
---|
282 | | -} |
---|
283 | | - |
---|
284 | 275 | static u16 packet_pick_tx_queue(struct sk_buff *skb) |
---|
285 | 276 | { |
---|
286 | 277 | struct net_device *dev = skb->dev; |
---|
287 | 278 | const struct net_device_ops *ops = dev->netdev_ops; |
---|
| 279 | + int cpu = raw_smp_processor_id(); |
---|
288 | 280 | u16 queue_index; |
---|
289 | 281 | |
---|
| 282 | +#ifdef CONFIG_XPS |
---|
| 283 | + skb->sender_cpu = cpu + 1; |
---|
| 284 | +#endif |
---|
| 285 | + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); |
---|
290 | 286 | if (ops->ndo_select_queue) { |
---|
291 | | - queue_index = ops->ndo_select_queue(dev, skb, NULL, |
---|
292 | | - __packet_pick_tx_queue); |
---|
| 287 | + queue_index = ops->ndo_select_queue(dev, skb, NULL); |
---|
293 | 288 | queue_index = netdev_cap_txqueue(dev, queue_index); |
---|
294 | 289 | } else { |
---|
295 | | - queue_index = __packet_pick_tx_queue(dev, skb, NULL); |
---|
| 290 | + queue_index = netdev_pick_tx(dev, skb, NULL); |
---|
296 | 291 | } |
---|
297 | 292 | |
---|
298 | 293 | return queue_index; |
---|
.. | .. |
---|
392 | 387 | smp_wmb(); |
---|
393 | 388 | } |
---|
394 | 389 | |
---|
395 | | -static int __packet_get_status(struct packet_sock *po, void *frame) |
---|
| 390 | +static int __packet_get_status(const struct packet_sock *po, void *frame) |
---|
396 | 391 | { |
---|
397 | 392 | union tpacket_uhdr h; |
---|
398 | 393 | |
---|
.. | .. |
---|
416 | 411 | } |
---|
417 | 412 | } |
---|
418 | 413 | |
---|
419 | | -static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, |
---|
| 414 | +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, |
---|
420 | 415 | unsigned int flags) |
---|
421 | 416 | { |
---|
422 | 417 | struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); |
---|
423 | 418 | |
---|
424 | 419 | if (shhwtstamps && |
---|
425 | 420 | (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && |
---|
426 | | - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) |
---|
| 421 | + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) |
---|
427 | 422 | return TP_STATUS_TS_RAW_HARDWARE; |
---|
428 | 423 | |
---|
429 | | - if (ktime_to_timespec_cond(skb->tstamp, ts)) |
---|
| 424 | + if ((flags & SOF_TIMESTAMPING_SOFTWARE) && |
---|
| 425 | + ktime_to_timespec64_cond(skb->tstamp, ts)) |
---|
430 | 426 | return TP_STATUS_TS_SOFTWARE; |
---|
431 | 427 | |
---|
432 | 428 | return 0; |
---|
.. | .. |
---|
436 | 432 | struct sk_buff *skb) |
---|
437 | 433 | { |
---|
438 | 434 | union tpacket_uhdr h; |
---|
439 | | - struct timespec ts; |
---|
| 435 | + struct timespec64 ts; |
---|
440 | 436 | __u32 ts_status; |
---|
441 | 437 | |
---|
442 | 438 | if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
---|
443 | 439 | return 0; |
---|
444 | 440 | |
---|
445 | 441 | h.raw = frame; |
---|
| 442 | + /* |
---|
| 443 | + * versions 1 through 3 overflow the timestamps in y2106, since they |
---|
| 444 | + * all store the seconds in a 32-bit unsigned integer. |
---|
| 445 | + * If we create a version 4, that should have a 64-bit timestamp, |
---|
| 446 | + * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit |
---|
| 447 | + * nanoseconds. |
---|
| 448 | + */ |
---|
446 | 449 | switch (po->tp_version) { |
---|
447 | 450 | case TPACKET_V1: |
---|
448 | 451 | h.h1->tp_sec = ts.tv_sec; |
---|
.. | .. |
---|
468 | 471 | return ts_status; |
---|
469 | 472 | } |
---|
470 | 473 | |
---|
471 | | -static void *packet_lookup_frame(struct packet_sock *po, |
---|
472 | | - struct packet_ring_buffer *rb, |
---|
473 | | - unsigned int position, |
---|
474 | | - int status) |
---|
| 474 | +static void *packet_lookup_frame(const struct packet_sock *po, |
---|
| 475 | + const struct packet_ring_buffer *rb, |
---|
| 476 | + unsigned int position, |
---|
| 477 | + int status) |
---|
475 | 478 | { |
---|
476 | 479 | unsigned int pg_vec_pos, frame_offset; |
---|
477 | 480 | union tpacket_uhdr h; |
---|
.. | .. |
---|
528 | 531 | int blk_size_in_bytes) |
---|
529 | 532 | { |
---|
530 | 533 | struct net_device *dev; |
---|
531 | | - unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; |
---|
| 534 | + unsigned int mbits, div; |
---|
532 | 535 | struct ethtool_link_ksettings ecmd; |
---|
533 | 536 | int err; |
---|
534 | 537 | |
---|
.. | .. |
---|
540 | 543 | } |
---|
541 | 544 | err = __ethtool_get_link_ksettings(dev, &ecmd); |
---|
542 | 545 | rtnl_unlock(); |
---|
543 | | - if (!err) { |
---|
544 | | - /* |
---|
545 | | - * If the link speed is so slow you don't really |
---|
546 | | - * need to worry about perf anyways |
---|
547 | | - */ |
---|
548 | | - if (ecmd.base.speed < SPEED_1000 || |
---|
549 | | - ecmd.base.speed == SPEED_UNKNOWN) { |
---|
550 | | - return DEFAULT_PRB_RETIRE_TOV; |
---|
551 | | - } else { |
---|
552 | | - msec = 1; |
---|
553 | | - div = ecmd.base.speed / 1000; |
---|
554 | | - } |
---|
555 | | - } else |
---|
| 546 | + if (err) |
---|
556 | 547 | return DEFAULT_PRB_RETIRE_TOV; |
---|
557 | 548 | |
---|
| 549 | + /* If the link speed is so slow you don't really |
---|
| 550 | + * need to worry about perf anyways |
---|
| 551 | + */ |
---|
| 552 | + if (ecmd.base.speed < SPEED_1000 || |
---|
| 553 | + ecmd.base.speed == SPEED_UNKNOWN) |
---|
| 554 | + return DEFAULT_PRB_RETIRE_TOV; |
---|
| 555 | + |
---|
| 556 | + div = ecmd.base.speed / 1000; |
---|
558 | 557 | mbits = (blk_size_in_bytes * 8) / (1024 * 1024); |
---|
559 | 558 | |
---|
560 | 559 | if (div) |
---|
561 | 560 | mbits /= div; |
---|
562 | 561 | |
---|
563 | | - tmo = mbits * msec; |
---|
564 | | - |
---|
565 | 562 | if (div) |
---|
566 | | - return tmo+1; |
---|
567 | | - return tmo; |
---|
| 563 | + return mbits + 1; |
---|
| 564 | + return mbits; |
---|
568 | 565 | } |
---|
569 | 566 | |
---|
570 | 567 | static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, |
---|
.. | .. |
---|
600 | 597 | req_u->req3.tp_block_size); |
---|
601 | 598 | p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); |
---|
602 | 599 | p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; |
---|
| 600 | + rwlock_init(&p1->blk_fill_in_prog_lock); |
---|
603 | 601 | |
---|
604 | 602 | p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); |
---|
605 | 603 | prb_init_ft_ops(p1, req_u); |
---|
.. | .. |
---|
666 | 664 | * |
---|
667 | 665 | */ |
---|
668 | 666 | if (BLOCK_NUM_PKTS(pbd)) { |
---|
669 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
---|
670 | | - /* Waiting for skb_copy_bits to finish... */ |
---|
671 | | - cpu_relax(); |
---|
672 | | - } |
---|
| 667 | + /* Waiting for skb_copy_bits to finish... */ |
---|
| 668 | + write_lock(&pkc->blk_fill_in_prog_lock); |
---|
| 669 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
---|
673 | 670 | } |
---|
674 | 671 | |
---|
675 | 672 | if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { |
---|
.. | .. |
---|
767 | 764 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
---|
768 | 765 | struct sock *sk = &po->sk; |
---|
769 | 766 | |
---|
770 | | - if (po->stats.stats3.tp_drops) |
---|
| 767 | + if (atomic_read(&po->tp_drops)) |
---|
771 | 768 | status |= TP_STATUS_LOSING; |
---|
772 | 769 | |
---|
773 | 770 | last_pkt = (struct tpacket3_hdr *)pkc1->prev; |
---|
.. | .. |
---|
783 | 780 | * It shouldn't really happen as we don't close empty |
---|
784 | 781 | * blocks. See prb_retire_rx_blk_timer_expired(). |
---|
785 | 782 | */ |
---|
786 | | - struct timespec ts; |
---|
787 | | - getnstimeofday(&ts); |
---|
| 783 | + struct timespec64 ts; |
---|
| 784 | + ktime_get_real_ts64(&ts); |
---|
788 | 785 | h1->ts_last_pkt.ts_sec = ts.tv_sec; |
---|
789 | 786 | h1->ts_last_pkt.ts_nsec = ts.tv_nsec; |
---|
790 | 787 | } |
---|
.. | .. |
---|
814 | 811 | static void prb_open_block(struct tpacket_kbdq_core *pkc1, |
---|
815 | 812 | struct tpacket_block_desc *pbd1) |
---|
816 | 813 | { |
---|
817 | | - struct timespec ts; |
---|
| 814 | + struct timespec64 ts; |
---|
818 | 815 | struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; |
---|
819 | 816 | |
---|
820 | 817 | smp_rmb(); |
---|
.. | .. |
---|
827 | 824 | BLOCK_NUM_PKTS(pbd1) = 0; |
---|
828 | 825 | BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); |
---|
829 | 826 | |
---|
830 | | - getnstimeofday(&ts); |
---|
| 827 | + ktime_get_real_ts64(&ts); |
---|
831 | 828 | |
---|
832 | 829 | h1->ts_first_pkt.ts_sec = ts.tv_sec; |
---|
833 | 830 | h1->ts_first_pkt.ts_nsec = ts.tv_nsec; |
---|
.. | .. |
---|
928 | 925 | * the timer-handler already handled this case. |
---|
929 | 926 | */ |
---|
930 | 927 | if (!(status & TP_STATUS_BLK_TMO)) { |
---|
931 | | - while (atomic_read(&pkc->blk_fill_in_prog)) { |
---|
932 | | - /* Waiting for skb_copy_bits to finish... */ |
---|
933 | | - cpu_relax(); |
---|
934 | | - } |
---|
| 928 | + /* Waiting for skb_copy_bits to finish... */ |
---|
| 929 | + write_lock(&pkc->blk_fill_in_prog_lock); |
---|
| 930 | + write_unlock(&pkc->blk_fill_in_prog_lock); |
---|
935 | 931 | } |
---|
936 | 932 | prb_close_block(pkc, pbd, po, status); |
---|
937 | 933 | return; |
---|
.. | .. |
---|
952 | 948 | __releases(&pkc->blk_fill_in_prog_lock) |
---|
953 | 949 | { |
---|
954 | 950 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
---|
955 | | - atomic_dec(&pkc->blk_fill_in_prog); |
---|
| 951 | + |
---|
| 952 | + read_unlock(&pkc->blk_fill_in_prog_lock); |
---|
956 | 953 | } |
---|
957 | 954 | |
---|
958 | 955 | static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, |
---|
.. | .. |
---|
1007 | 1004 | pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); |
---|
1008 | 1005 | BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); |
---|
1009 | 1006 | BLOCK_NUM_PKTS(pbd) += 1; |
---|
1010 | | - atomic_inc(&pkc->blk_fill_in_prog); |
---|
| 1007 | + read_lock(&pkc->blk_fill_in_prog_lock); |
---|
1011 | 1008 | prb_run_all_ft_ops(pkc, ppd); |
---|
1012 | 1009 | } |
---|
1013 | 1010 | |
---|
1014 | 1011 | /* Assumes caller has the sk->rx_queue.lock */ |
---|
1015 | 1012 | static void *__packet_lookup_frame_in_block(struct packet_sock *po, |
---|
1016 | 1013 | struct sk_buff *skb, |
---|
1017 | | - int status, |
---|
1018 | 1014 | unsigned int len |
---|
1019 | 1015 | ) |
---|
1020 | 1016 | { |
---|
.. | .. |
---|
1086 | 1082 | po->rx_ring.head, status); |
---|
1087 | 1083 | return curr; |
---|
1088 | 1084 | case TPACKET_V3: |
---|
1089 | | - return __packet_lookup_frame_in_block(po, skb, status, len); |
---|
| 1085 | + return __packet_lookup_frame_in_block(po, skb, len); |
---|
1090 | 1086 | default: |
---|
1091 | 1087 | WARN(1, "TPACKET version not supported\n"); |
---|
1092 | 1088 | BUG(); |
---|
.. | .. |
---|
1094 | 1090 | } |
---|
1095 | 1091 | } |
---|
1096 | 1092 | |
---|
1097 | | -static void *prb_lookup_block(struct packet_sock *po, |
---|
1098 | | - struct packet_ring_buffer *rb, |
---|
1099 | | - unsigned int idx, |
---|
1100 | | - int status) |
---|
| 1093 | +static void *prb_lookup_block(const struct packet_sock *po, |
---|
| 1094 | + const struct packet_ring_buffer *rb, |
---|
| 1095 | + unsigned int idx, |
---|
| 1096 | + int status) |
---|
1101 | 1097 | { |
---|
1102 | 1098 | struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); |
---|
1103 | 1099 | struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); |
---|
.. | .. |
---|
1210 | 1206 | #define ROOM_LOW 0x1 |
---|
1211 | 1207 | #define ROOM_NORMAL 0x2 |
---|
1212 | 1208 | |
---|
1213 | | -static bool __tpacket_has_room(struct packet_sock *po, int pow_off) |
---|
| 1209 | +static bool __tpacket_has_room(const struct packet_sock *po, int pow_off) |
---|
1214 | 1210 | { |
---|
1215 | 1211 | int idx, len; |
---|
1216 | 1212 | |
---|
1217 | | - len = po->rx_ring.frame_max + 1; |
---|
1218 | | - idx = po->rx_ring.head; |
---|
| 1213 | + len = READ_ONCE(po->rx_ring.frame_max) + 1; |
---|
| 1214 | + idx = READ_ONCE(po->rx_ring.head); |
---|
1219 | 1215 | if (pow_off) |
---|
1220 | 1216 | idx += len >> pow_off; |
---|
1221 | 1217 | if (idx >= len) |
---|
.. | .. |
---|
1223 | 1219 | return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
---|
1224 | 1220 | } |
---|
1225 | 1221 | |
---|
1226 | | -static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) |
---|
| 1222 | +static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) |
---|
1227 | 1223 | { |
---|
1228 | 1224 | int idx, len; |
---|
1229 | 1225 | |
---|
1230 | | - len = po->rx_ring.prb_bdqc.knum_blocks; |
---|
1231 | | - idx = po->rx_ring.prb_bdqc.kactive_blk_num; |
---|
| 1226 | + len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); |
---|
| 1227 | + idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); |
---|
1232 | 1228 | if (pow_off) |
---|
1233 | 1229 | idx += len >> pow_off; |
---|
1234 | 1230 | if (idx >= len) |
---|
.. | .. |
---|
1236 | 1232 | return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); |
---|
1237 | 1233 | } |
---|
1238 | 1234 | |
---|
1239 | | -static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
---|
| 1235 | +static int __packet_rcv_has_room(const struct packet_sock *po, |
---|
| 1236 | + const struct sk_buff *skb) |
---|
1240 | 1237 | { |
---|
1241 | | - struct sock *sk = &po->sk; |
---|
| 1238 | + const struct sock *sk = &po->sk; |
---|
1242 | 1239 | int ret = ROOM_NONE; |
---|
1243 | 1240 | |
---|
1244 | 1241 | if (po->prot_hook.func != tpacket_rcv) { |
---|
1245 | | - int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
---|
1246 | | - - (skb ? skb->truesize : 0); |
---|
1247 | | - if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) |
---|
| 1242 | + int rcvbuf = READ_ONCE(sk->sk_rcvbuf); |
---|
| 1243 | + int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) |
---|
| 1244 | + - (skb ? skb->truesize : 0); |
---|
| 1245 | + |
---|
| 1246 | + if (avail > (rcvbuf >> ROOM_POW_OFF)) |
---|
1248 | 1247 | return ROOM_NORMAL; |
---|
1249 | 1248 | else if (avail > 0) |
---|
1250 | 1249 | return ROOM_LOW; |
---|
.. | .. |
---|
1269 | 1268 | |
---|
1270 | 1269 | static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) |
---|
1271 | 1270 | { |
---|
1272 | | - int ret; |
---|
1273 | | - bool has_room; |
---|
| 1271 | + int pressure, ret; |
---|
1274 | 1272 | |
---|
1275 | | - spin_lock_bh(&po->sk.sk_receive_queue.lock); |
---|
1276 | 1273 | ret = __packet_rcv_has_room(po, skb); |
---|
1277 | | - has_room = ret == ROOM_NORMAL; |
---|
1278 | | - if (po->pressure == has_room) |
---|
1279 | | - po->pressure = !has_room; |
---|
1280 | | - spin_unlock_bh(&po->sk.sk_receive_queue.lock); |
---|
| 1274 | + pressure = ret != ROOM_NORMAL; |
---|
| 1275 | + |
---|
| 1276 | + if (READ_ONCE(po->pressure) != pressure) |
---|
| 1277 | + WRITE_ONCE(po->pressure, pressure); |
---|
1281 | 1278 | |
---|
1282 | 1279 | return ret; |
---|
| 1280 | +} |
---|
| 1281 | + |
---|
| 1282 | +static void packet_rcv_try_clear_pressure(struct packet_sock *po) |
---|
| 1283 | +{ |
---|
| 1284 | + if (READ_ONCE(po->pressure) && |
---|
| 1285 | + __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
---|
| 1286 | + WRITE_ONCE(po->pressure, 0); |
---|
1283 | 1287 | } |
---|
1284 | 1288 | |
---|
1285 | 1289 | static void packet_sock_destruct(struct sock *sk) |
---|
.. | .. |
---|
1355 | 1359 | struct packet_sock *po, *po_next, *po_skip = NULL; |
---|
1356 | 1360 | unsigned int i, j, room = ROOM_NONE; |
---|
1357 | 1361 | |
---|
1358 | | - po = pkt_sk(f->arr[idx]); |
---|
| 1362 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
---|
1359 | 1363 | |
---|
1360 | 1364 | if (try_self) { |
---|
1361 | 1365 | room = packet_rcv_has_room(po, skb); |
---|
.. | .. |
---|
1367 | 1371 | |
---|
1368 | 1372 | i = j = min_t(int, po->rollover->sock, num - 1); |
---|
1369 | 1373 | do { |
---|
1370 | | - po_next = pkt_sk(f->arr[i]); |
---|
1371 | | - if (po_next != po_skip && !po_next->pressure && |
---|
| 1374 | + po_next = pkt_sk(rcu_dereference(f->arr[i])); |
---|
| 1375 | + if (po_next != po_skip && !READ_ONCE(po_next->pressure) && |
---|
1372 | 1376 | packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { |
---|
1373 | 1377 | if (i != j) |
---|
1374 | 1378 | po->rollover->sock = i; |
---|
.. | .. |
---|
1462 | 1466 | if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) |
---|
1463 | 1467 | idx = fanout_demux_rollover(f, skb, idx, true, num); |
---|
1464 | 1468 | |
---|
1465 | | - po = pkt_sk(f->arr[idx]); |
---|
| 1469 | + po = pkt_sk(rcu_dereference(f->arr[idx])); |
---|
1466 | 1470 | return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); |
---|
1467 | 1471 | } |
---|
1468 | 1472 | |
---|
.. | .. |
---|
1476 | 1480 | struct packet_fanout *f = po->fanout; |
---|
1477 | 1481 | |
---|
1478 | 1482 | spin_lock(&f->lock); |
---|
1479 | | - f->arr[f->num_members] = sk; |
---|
| 1483 | + rcu_assign_pointer(f->arr[f->num_members], sk); |
---|
1480 | 1484 | smp_wmb(); |
---|
1481 | 1485 | f->num_members++; |
---|
1482 | 1486 | if (f->num_members == 1) |
---|
.. | .. |
---|
1491 | 1495 | |
---|
1492 | 1496 | spin_lock(&f->lock); |
---|
1493 | 1497 | for (i = 0; i < f->num_members; i++) { |
---|
1494 | | - if (f->arr[i] == sk) |
---|
| 1498 | + if (rcu_dereference_protected(f->arr[i], |
---|
| 1499 | + lockdep_is_held(&f->lock)) == sk) |
---|
1495 | 1500 | break; |
---|
1496 | 1501 | } |
---|
1497 | 1502 | BUG_ON(i >= f->num_members); |
---|
1498 | | - f->arr[i] = f->arr[f->num_members - 1]; |
---|
| 1503 | + rcu_assign_pointer(f->arr[i], |
---|
| 1504 | + rcu_dereference_protected(f->arr[f->num_members - 1], |
---|
| 1505 | + lockdep_is_held(&f->lock))); |
---|
1499 | 1506 | f->num_members--; |
---|
1500 | 1507 | if (f->num_members == 0) |
---|
1501 | 1508 | __dev_remove_pack(&f->prot_hook); |
---|
.. | .. |
---|
1538 | 1545 | } |
---|
1539 | 1546 | } |
---|
1540 | 1547 | |
---|
1541 | | -static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, |
---|
| 1548 | +static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, |
---|
1542 | 1549 | unsigned int len) |
---|
1543 | 1550 | { |
---|
1544 | 1551 | struct bpf_prog *new; |
---|
.. | .. |
---|
1547 | 1554 | |
---|
1548 | 1555 | if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) |
---|
1549 | 1556 | return -EPERM; |
---|
1550 | | - if (len != sizeof(fprog)) |
---|
1551 | | - return -EINVAL; |
---|
1552 | | - if (copy_from_user(&fprog, data, len)) |
---|
1553 | | - return -EFAULT; |
---|
| 1557 | + |
---|
| 1558 | + ret = copy_bpf_fprog_from_user(&fprog, data, len); |
---|
| 1559 | + if (ret) |
---|
| 1560 | + return ret; |
---|
1554 | 1561 | |
---|
1555 | 1562 | ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); |
---|
1556 | 1563 | if (ret) |
---|
.. | .. |
---|
1560 | 1567 | return 0; |
---|
1561 | 1568 | } |
---|
1562 | 1569 | |
---|
1563 | | -static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, |
---|
| 1570 | +static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, |
---|
1564 | 1571 | unsigned int len) |
---|
1565 | 1572 | { |
---|
1566 | 1573 | struct bpf_prog *new; |
---|
.. | .. |
---|
1570 | 1577 | return -EPERM; |
---|
1571 | 1578 | if (len != sizeof(fd)) |
---|
1572 | 1579 | return -EINVAL; |
---|
1573 | | - if (copy_from_user(&fd, data, len)) |
---|
| 1580 | + if (copy_from_sockptr(&fd, data, len)) |
---|
1574 | 1581 | return -EFAULT; |
---|
1575 | 1582 | |
---|
1576 | 1583 | new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); |
---|
.. | .. |
---|
1581 | 1588 | return 0; |
---|
1582 | 1589 | } |
---|
1583 | 1590 | |
---|
1584 | | -static int fanout_set_data(struct packet_sock *po, char __user *data, |
---|
| 1591 | +static int fanout_set_data(struct packet_sock *po, sockptr_t data, |
---|
1585 | 1592 | unsigned int len) |
---|
1586 | 1593 | { |
---|
1587 | 1594 | switch (po->fanout->type) { |
---|
.. | .. |
---|
1633 | 1640 | return false; |
---|
1634 | 1641 | } |
---|
1635 | 1642 | |
---|
1636 | | -static int fanout_add(struct sock *sk, u16 id, u16 type_flags) |
---|
| 1643 | +static int fanout_add(struct sock *sk, struct fanout_args *args) |
---|
1637 | 1644 | { |
---|
1638 | 1645 | struct packet_rollover *rollover = NULL; |
---|
1639 | 1646 | struct packet_sock *po = pkt_sk(sk); |
---|
| 1647 | + u16 type_flags = args->type_flags; |
---|
1640 | 1648 | struct packet_fanout *f, *match; |
---|
1641 | 1649 | u8 type = type_flags & 0xff; |
---|
1642 | 1650 | u8 flags = type_flags >> 8; |
---|
| 1651 | + u16 id = args->id; |
---|
1643 | 1652 | int err; |
---|
1644 | 1653 | |
---|
1645 | 1654 | switch (type) { |
---|
.. | .. |
---|
1697 | 1706 | } |
---|
1698 | 1707 | } |
---|
1699 | 1708 | err = -EINVAL; |
---|
1700 | | - if (match && match->flags != flags) |
---|
1701 | | - goto out; |
---|
1702 | | - if (!match) { |
---|
| 1709 | + if (match) { |
---|
| 1710 | + if (match->flags != flags) |
---|
| 1711 | + goto out; |
---|
| 1712 | + if (args->max_num_members && |
---|
| 1713 | + args->max_num_members != match->max_num_members) |
---|
| 1714 | + goto out; |
---|
| 1715 | + } else { |
---|
| 1716 | + if (args->max_num_members > PACKET_FANOUT_MAX) |
---|
| 1717 | + goto out; |
---|
| 1718 | + if (!args->max_num_members) |
---|
| 1719 | + /* legacy PACKET_FANOUT_MAX */ |
---|
| 1720 | + args->max_num_members = 256; |
---|
1703 | 1721 | err = -ENOMEM; |
---|
1704 | | - match = kzalloc(sizeof(*match), GFP_KERNEL); |
---|
| 1722 | + match = kvzalloc(struct_size(match, arr, args->max_num_members), |
---|
| 1723 | + GFP_KERNEL); |
---|
1705 | 1724 | if (!match) |
---|
1706 | 1725 | goto out; |
---|
1707 | 1726 | write_pnet(&match->net, sock_net(sk)); |
---|
.. | .. |
---|
1718 | 1737 | match->prot_hook.af_packet_priv = match; |
---|
1719 | 1738 | match->prot_hook.af_packet_net = read_pnet(&match->net); |
---|
1720 | 1739 | match->prot_hook.id_match = match_fanout_group; |
---|
| 1740 | + match->max_num_members = args->max_num_members; |
---|
1721 | 1741 | list_add(&match->list, &fanout_list); |
---|
1722 | 1742 | } |
---|
1723 | 1743 | err = -EINVAL; |
---|
.. | .. |
---|
1728 | 1748 | match->prot_hook.type == po->prot_hook.type && |
---|
1729 | 1749 | match->prot_hook.dev == po->prot_hook.dev) { |
---|
1730 | 1750 | err = -ENOSPC; |
---|
1731 | | - if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { |
---|
| 1751 | + if (refcount_read(&match->sk_ref) < match->max_num_members) { |
---|
1732 | 1752 | __dev_remove_pack(&po->prot_hook); |
---|
1733 | 1753 | |
---|
1734 | 1754 | /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ |
---|
.. | .. |
---|
1745 | 1765 | |
---|
1746 | 1766 | if (err && !refcount_read(&match->sk_ref)) { |
---|
1747 | 1767 | list_del(&match->list); |
---|
1748 | | - kfree(match); |
---|
| 1768 | + kvfree(match); |
---|
1749 | 1769 | } |
---|
1750 | 1770 | |
---|
1751 | 1771 | out: |
---|
.. | .. |
---|
1835 | 1855 | skb_dst_drop(skb); |
---|
1836 | 1856 | |
---|
1837 | 1857 | /* drop conntrack reference */ |
---|
1838 | | - nf_reset(skb); |
---|
| 1858 | + nf_reset_ct(skb); |
---|
1839 | 1859 | |
---|
1840 | 1860 | spkt = &PACKET_SKB_CB(skb)->sa.pkt; |
---|
1841 | 1861 | |
---|
.. | .. |
---|
1863 | 1883 | return 0; |
---|
1864 | 1884 | } |
---|
1865 | 1885 | |
---|
| 1886 | +static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) |
---|
| 1887 | +{ |
---|
| 1888 | + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && |
---|
| 1889 | + sock->type == SOCK_RAW) { |
---|
| 1890 | + skb_reset_mac_header(skb); |
---|
| 1891 | + skb->protocol = dev_parse_header_protocol(skb); |
---|
| 1892 | + } |
---|
| 1893 | + |
---|
| 1894 | + skb_probe_transport_header(skb); |
---|
| 1895 | +} |
---|
1866 | 1896 | |
---|
1867 | 1897 | /* |
---|
1868 | 1898 | * Output a raw packet to a device layer. This bypasses all the other |
---|
.. | .. |
---|
1978 | 2008 | skb->mark = sk->sk_mark; |
---|
1979 | 2009 | skb->tstamp = sockc.transmit_time; |
---|
1980 | 2010 | |
---|
1981 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 2011 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
---|
1982 | 2012 | |
---|
1983 | 2013 | if (unlikely(extra_len == 4)) |
---|
1984 | 2014 | skb->no_fcs = 1; |
---|
1985 | 2015 | |
---|
1986 | | - skb_probe_transport_header(skb, 0); |
---|
| 2016 | + packet_parse_headers(skb, sock); |
---|
1987 | 2017 | |
---|
1988 | 2018 | dev_queue_xmit(skb); |
---|
1989 | 2019 | rcu_read_unlock(); |
---|
.. | .. |
---|
2060 | 2090 | |
---|
2061 | 2091 | skb->dev = dev; |
---|
2062 | 2092 | |
---|
2063 | | - if (dev->header_ops) { |
---|
| 2093 | + if (dev_has_header(dev)) { |
---|
2064 | 2094 | /* The device has an explicit notion of ll header, |
---|
2065 | 2095 | * exported to higher levels. |
---|
2066 | 2096 | * |
---|
.. | .. |
---|
2125 | 2155 | skb_dst_drop(skb); |
---|
2126 | 2156 | |
---|
2127 | 2157 | /* drop conntrack reference */ |
---|
2128 | | - nf_reset(skb); |
---|
| 2158 | + nf_reset_ct(skb); |
---|
2129 | 2159 | |
---|
2130 | 2160 | spin_lock(&sk->sk_receive_queue.lock); |
---|
2131 | 2161 | po->stats.stats1.tp_packets++; |
---|
.. | .. |
---|
2137 | 2167 | |
---|
2138 | 2168 | drop_n_acct: |
---|
2139 | 2169 | is_drop_n_account = true; |
---|
2140 | | - spin_lock(&sk->sk_receive_queue.lock); |
---|
2141 | | - po->stats.stats1.tp_drops++; |
---|
| 2170 | + atomic_inc(&po->tp_drops); |
---|
2142 | 2171 | atomic_inc(&sk->sk_drops); |
---|
2143 | | - spin_unlock(&sk->sk_receive_queue.lock); |
---|
2144 | 2172 | |
---|
2145 | 2173 | drop_n_restore: |
---|
2146 | 2174 | if (skb_head != skb->data && skb_shared(skb)) { |
---|
.. | .. |
---|
2169 | 2197 | unsigned short macoff, hdrlen; |
---|
2170 | 2198 | unsigned int netoff; |
---|
2171 | 2199 | struct sk_buff *copy_skb = NULL; |
---|
2172 | | - struct timespec ts; |
---|
| 2200 | + struct timespec64 ts; |
---|
2173 | 2201 | __u32 ts_status; |
---|
2174 | 2202 | bool is_drop_n_account = false; |
---|
2175 | 2203 | unsigned int slot_id = 0; |
---|
.. | .. |
---|
2191 | 2219 | if (!net_eq(dev_net(dev), sock_net(sk))) |
---|
2192 | 2220 | goto drop; |
---|
2193 | 2221 | |
---|
2194 | | - if (dev->header_ops) { |
---|
| 2222 | + if (dev_has_header(dev)) { |
---|
2195 | 2223 | if (sk->sk_type != SOCK_DGRAM) |
---|
2196 | 2224 | skb_push(skb, skb->data - skb_mac_header(skb)); |
---|
2197 | 2225 | else if (skb->pkt_type == PACKET_OUTGOING) { |
---|
.. | .. |
---|
2206 | 2234 | if (!res) |
---|
2207 | 2235 | goto drop_n_restore; |
---|
2208 | 2236 | |
---|
| 2237 | + /* If we are flooded, just give up */ |
---|
| 2238 | + if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { |
---|
| 2239 | + atomic_inc(&po->tp_drops); |
---|
| 2240 | + goto drop_n_restore; |
---|
| 2241 | + } |
---|
| 2242 | + |
---|
2209 | 2243 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
---|
2210 | 2244 | status |= TP_STATUS_CSUMNOTREADY; |
---|
2211 | 2245 | else if (skb->pkt_type != PACKET_OUTGOING && |
---|
2212 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
---|
2213 | | - skb_csum_unnecessary(skb))) |
---|
| 2246 | + skb_csum_unnecessary(skb)) |
---|
2214 | 2247 | status |= TP_STATUS_CSUM_VALID; |
---|
2215 | 2248 | |
---|
2216 | 2249 | if (snaplen > res) |
---|
.. | .. |
---|
2231 | 2264 | macoff = netoff - maclen; |
---|
2232 | 2265 | } |
---|
2233 | 2266 | if (netoff > USHRT_MAX) { |
---|
2234 | | - spin_lock(&sk->sk_receive_queue.lock); |
---|
2235 | | - po->stats.stats1.tp_drops++; |
---|
2236 | | - spin_unlock(&sk->sk_receive_queue.lock); |
---|
| 2267 | + atomic_inc(&po->tp_drops); |
---|
2237 | 2268 | goto drop_n_restore; |
---|
2238 | 2269 | } |
---|
2239 | 2270 | if (po->tp_version <= TPACKET_V2) { |
---|
.. | .. |
---|
2246 | 2277 | copy_skb = skb_get(skb); |
---|
2247 | 2278 | skb_head = skb->data; |
---|
2248 | 2279 | } |
---|
2249 | | - if (copy_skb) |
---|
| 2280 | + if (copy_skb) { |
---|
| 2281 | + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, |
---|
| 2282 | + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); |
---|
2250 | 2283 | skb_set_owner_r(copy_skb, sk); |
---|
| 2284 | + } |
---|
2251 | 2285 | } |
---|
2252 | 2286 | snaplen = po->rx_ring.frame_size - macoff; |
---|
2253 | 2287 | if ((int)snaplen < 0) { |
---|
.. | .. |
---|
2299 | 2333 | * Anyways, moving it for V1/V2 only as V3 doesn't need this |
---|
2300 | 2334 | * at packet level. |
---|
2301 | 2335 | */ |
---|
2302 | | - if (po->stats.stats1.tp_drops) |
---|
| 2336 | + if (atomic_read(&po->tp_drops)) |
---|
2303 | 2337 | status |= TP_STATUS_LOSING; |
---|
2304 | 2338 | } |
---|
2305 | 2339 | |
---|
.. | .. |
---|
2312 | 2346 | |
---|
2313 | 2347 | skb_copy_bits(skb, 0, h.raw + macoff, snaplen); |
---|
2314 | 2348 | |
---|
2315 | | - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) |
---|
2316 | | - getnstimeofday(&ts); |
---|
| 2349 | + /* Always timestamp; prefer an existing software timestamp taken |
---|
| 2350 | + * closer to the time of capture. |
---|
| 2351 | + */ |
---|
| 2352 | + ts_status = tpacket_get_timestamp(skb, &ts, |
---|
| 2353 | + po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); |
---|
| 2354 | + if (!ts_status) |
---|
| 2355 | + ktime_get_real_ts64(&ts); |
---|
2317 | 2356 | |
---|
2318 | 2357 | status |= ts_status; |
---|
2319 | 2358 | |
---|
.. | .. |
---|
2412 | 2451 | return 0; |
---|
2413 | 2452 | |
---|
2414 | 2453 | drop_n_account: |
---|
2415 | | - is_drop_n_account = true; |
---|
2416 | | - po->stats.stats1.tp_drops++; |
---|
2417 | 2454 | spin_unlock(&sk->sk_receive_queue.lock); |
---|
| 2455 | + atomic_inc(&po->tp_drops); |
---|
| 2456 | + is_drop_n_account = true; |
---|
2418 | 2457 | |
---|
2419 | 2458 | sk->sk_data_ready(sk); |
---|
2420 | 2459 | kfree_skb(copy_skb); |
---|
.. | .. |
---|
2440 | 2479 | } |
---|
2441 | 2480 | |
---|
2442 | 2481 | sock_wfree(skb); |
---|
2443 | | -} |
---|
2444 | | - |
---|
2445 | | -static void tpacket_set_protocol(const struct net_device *dev, |
---|
2446 | | - struct sk_buff *skb) |
---|
2447 | | -{ |
---|
2448 | | - if (dev->type == ARPHRD_ETHER) { |
---|
2449 | | - skb_reset_mac_header(skb); |
---|
2450 | | - skb->protocol = eth_hdr(skb)->h_proto; |
---|
2451 | | - } |
---|
2452 | 2482 | } |
---|
2453 | 2483 | |
---|
2454 | 2484 | static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) |
---|
.. | .. |
---|
2498 | 2528 | skb->priority = po->sk.sk_priority; |
---|
2499 | 2529 | skb->mark = po->sk.sk_mark; |
---|
2500 | 2530 | skb->tstamp = sockc->transmit_time; |
---|
2501 | | - sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 2531 | + skb_setup_tx_timestamp(skb, sockc->tsflags); |
---|
2502 | 2532 | skb_zcopy_set_nouarg(skb, ph.raw); |
---|
2503 | 2533 | |
---|
2504 | 2534 | skb_reserve(skb, hlen); |
---|
.. | .. |
---|
2521 | 2551 | return err; |
---|
2522 | 2552 | if (!dev_validate_header(dev, skb->data, hdrlen)) |
---|
2523 | 2553 | return -EINVAL; |
---|
2524 | | - if (!skb->protocol) |
---|
2525 | | - tpacket_set_protocol(dev, skb); |
---|
2526 | 2554 | |
---|
2527 | 2555 | data += hdrlen; |
---|
2528 | 2556 | to_write -= hdrlen; |
---|
.. | .. |
---|
2557 | 2585 | len = ((to_write > len_max) ? len_max : to_write); |
---|
2558 | 2586 | } |
---|
2559 | 2587 | |
---|
2560 | | - skb_probe_transport_header(skb, 0); |
---|
| 2588 | + packet_parse_headers(skb, sock); |
---|
2561 | 2589 | |
---|
2562 | 2590 | return tp_len; |
---|
2563 | 2591 | } |
---|
.. | .. |
---|
2788 | 2816 | |
---|
2789 | 2817 | status = TP_STATUS_SEND_REQUEST; |
---|
2790 | 2818 | err = po->xmit(skb); |
---|
2791 | | - if (unlikely(err > 0)) { |
---|
2792 | | - err = net_xmit_errno(err); |
---|
| 2819 | + if (unlikely(err != 0)) { |
---|
| 2820 | + if (err > 0) |
---|
| 2821 | + err = net_xmit_errno(err); |
---|
2793 | 2822 | if (err && __packet_get_status(po, ph) == |
---|
2794 | 2823 | TP_STATUS_AVAILABLE) { |
---|
2795 | 2824 | /* skb was destructed already */ |
---|
.. | .. |
---|
2956 | 2985 | if (err) |
---|
2957 | 2986 | goto out_free; |
---|
2958 | 2987 | |
---|
2959 | | - if (sock->type == SOCK_RAW && |
---|
2960 | | - !dev_validate_header(dev, skb->data, len)) { |
---|
| 2988 | + if ((sock->type == SOCK_RAW && |
---|
| 2989 | + !dev_validate_header(dev, skb->data, len)) || !skb->len) { |
---|
2961 | 2990 | err = -EINVAL; |
---|
2962 | 2991 | goto out_free; |
---|
2963 | 2992 | } |
---|
2964 | 2993 | |
---|
2965 | | - sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); |
---|
| 2994 | + skb_setup_tx_timestamp(skb, sockc.tsflags); |
---|
2966 | 2995 | |
---|
2967 | 2996 | if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && |
---|
2968 | 2997 | !packet_extra_vlan_len_allowed(dev, skb)) { |
---|
.. | .. |
---|
2984 | 3013 | virtio_net_hdr_set_proto(skb, &vnet_hdr); |
---|
2985 | 3014 | } |
---|
2986 | 3015 | |
---|
2987 | | - skb_probe_transport_header(skb, reserve); |
---|
| 3016 | + packet_parse_headers(skb, sock); |
---|
2988 | 3017 | |
---|
2989 | 3018 | if (unlikely(extra_len == 4)) |
---|
2990 | 3019 | skb->no_fcs = 1; |
---|
2991 | 3020 | |
---|
2992 | 3021 | err = po->xmit(skb); |
---|
2993 | | - if (err > 0 && (err = net_xmit_errno(err)) != 0) |
---|
2994 | | - goto out_unlock; |
---|
| 3022 | + if (unlikely(err != 0)) { |
---|
| 3023 | + if (err > 0) |
---|
| 3024 | + err = net_xmit_errno(err); |
---|
| 3025 | + if (err) |
---|
| 3026 | + goto out_unlock; |
---|
| 3027 | + } |
---|
2995 | 3028 | |
---|
2996 | 3029 | dev_put(dev); |
---|
2997 | 3030 | |
---|
.. | .. |
---|
3011 | 3044 | struct sock *sk = sock->sk; |
---|
3012 | 3045 | struct packet_sock *po = pkt_sk(sk); |
---|
3013 | 3046 | |
---|
3014 | | - if (po->tx_ring.pg_vec) |
---|
| 3047 | + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. |
---|
| 3048 | + * tpacket_snd() will redo the check safely. |
---|
| 3049 | + */ |
---|
| 3050 | + if (data_race(po->tx_ring.pg_vec)) |
---|
3015 | 3051 | return tpacket_snd(po, msg); |
---|
3016 | | - else |
---|
3017 | | - return packet_snd(sock, msg, len); |
---|
| 3052 | + |
---|
| 3053 | + return packet_snd(sock, msg, len); |
---|
3018 | 3054 | } |
---|
3019 | 3055 | |
---|
3020 | 3056 | /* |
---|
.. | .. |
---|
3075 | 3111 | kfree(po->rollover); |
---|
3076 | 3112 | if (f) { |
---|
3077 | 3113 | fanout_release_data(f); |
---|
3078 | | - kfree(f); |
---|
| 3114 | + kvfree(f); |
---|
3079 | 3115 | } |
---|
3080 | 3116 | /* |
---|
3081 | 3117 | * Now the socket is dead. No more input will appear. |
---|
.. | .. |
---|
3370 | 3406 | if (skb == NULL) |
---|
3371 | 3407 | goto out; |
---|
3372 | 3408 | |
---|
3373 | | - if (pkt_sk(sk)->pressure) |
---|
3374 | | - packet_rcv_has_room(pkt_sk(sk), NULL); |
---|
| 3409 | + packet_rcv_try_clear_pressure(pkt_sk(sk)); |
---|
3375 | 3410 | |
---|
3376 | 3411 | if (pkt_sk(sk)->has_vnet_hdr) { |
---|
3377 | 3412 | err = packet_rcv_vnet(msg, skb, &len); |
---|
.. | .. |
---|
3406 | 3441 | sock_recv_ts_and_drops(msg, sk, skb); |
---|
3407 | 3442 | |
---|
3408 | 3443 | if (msg->msg_name) { |
---|
| 3444 | + const size_t max_len = min(sizeof(skb->cb), |
---|
| 3445 | + sizeof(struct sockaddr_storage)); |
---|
3409 | 3446 | int copy_len; |
---|
3410 | 3447 | |
---|
3411 | 3448 | /* If the address length field is there to be filled |
---|
.. | .. |
---|
3428 | 3465 | msg->msg_namelen = sizeof(struct sockaddr_ll); |
---|
3429 | 3466 | } |
---|
3430 | 3467 | } |
---|
| 3468 | + if (WARN_ON_ONCE(copy_len > max_len)) { |
---|
| 3469 | + copy_len = max_len; |
---|
| 3470 | + msg->msg_namelen = copy_len; |
---|
| 3471 | + } |
---|
3431 | 3472 | memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); |
---|
3432 | 3473 | } |
---|
3433 | 3474 | |
---|
.. | .. |
---|
3438 | 3479 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
---|
3439 | 3480 | aux.tp_status |= TP_STATUS_CSUMNOTREADY; |
---|
3440 | 3481 | else if (skb->pkt_type != PACKET_OUTGOING && |
---|
3441 | | - (skb->ip_summed == CHECKSUM_COMPLETE || |
---|
3442 | | - skb_csum_unnecessary(skb))) |
---|
| 3482 | + skb_csum_unnecessary(skb)) |
---|
3443 | 3483 | aux.tp_status |= TP_STATUS_CSUM_VALID; |
---|
3444 | 3484 | |
---|
3445 | 3485 | aux.tp_len = origlen; |
---|
.. | .. |
---|
3669 | 3709 | } |
---|
3670 | 3710 | |
---|
3671 | 3711 | static int |
---|
3672 | | -packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) |
---|
| 3712 | +packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, |
---|
| 3713 | + unsigned int optlen) |
---|
3673 | 3714 | { |
---|
3674 | 3715 | struct sock *sk = sock->sk; |
---|
3675 | 3716 | struct packet_sock *po = pkt_sk(sk); |
---|
.. | .. |
---|
3689 | 3730 | return -EINVAL; |
---|
3690 | 3731 | if (len > sizeof(mreq)) |
---|
3691 | 3732 | len = sizeof(mreq); |
---|
3692 | | - if (copy_from_user(&mreq, optval, len)) |
---|
| 3733 | + if (copy_from_sockptr(&mreq, optval, len)) |
---|
3693 | 3734 | return -EFAULT; |
---|
3694 | 3735 | if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) |
---|
3695 | 3736 | return -EINVAL; |
---|
.. | .. |
---|
3720 | 3761 | if (optlen < len) { |
---|
3721 | 3762 | ret = -EINVAL; |
---|
3722 | 3763 | } else { |
---|
3723 | | - if (copy_from_user(&req_u.req, optval, len)) |
---|
| 3764 | + if (copy_from_sockptr(&req_u.req, optval, len)) |
---|
3724 | 3765 | ret = -EFAULT; |
---|
3725 | 3766 | else |
---|
3726 | 3767 | ret = packet_set_ring(sk, &req_u, 0, |
---|
.. | .. |
---|
3735 | 3776 | |
---|
3736 | 3777 | if (optlen != sizeof(val)) |
---|
3737 | 3778 | return -EINVAL; |
---|
3738 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3779 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3739 | 3780 | return -EFAULT; |
---|
3740 | 3781 | |
---|
3741 | 3782 | pkt_sk(sk)->copy_thresh = val; |
---|
.. | .. |
---|
3747 | 3788 | |
---|
3748 | 3789 | if (optlen != sizeof(val)) |
---|
3749 | 3790 | return -EINVAL; |
---|
3750 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3791 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3751 | 3792 | return -EFAULT; |
---|
3752 | 3793 | switch (val) { |
---|
3753 | 3794 | case TPACKET_V1: |
---|
.. | .. |
---|
3773 | 3814 | |
---|
3774 | 3815 | if (optlen != sizeof(val)) |
---|
3775 | 3816 | return -EINVAL; |
---|
3776 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3817 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3777 | 3818 | return -EFAULT; |
---|
3778 | 3819 | if (val > INT_MAX) |
---|
3779 | 3820 | return -EINVAL; |
---|
.. | .. |
---|
3793 | 3834 | |
---|
3794 | 3835 | if (optlen != sizeof(val)) |
---|
3795 | 3836 | return -EINVAL; |
---|
3796 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3837 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3797 | 3838 | return -EFAULT; |
---|
3798 | 3839 | |
---|
3799 | 3840 | lock_sock(sk); |
---|
.. | .. |
---|
3812 | 3853 | |
---|
3813 | 3854 | if (optlen < sizeof(val)) |
---|
3814 | 3855 | return -EINVAL; |
---|
3815 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3856 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3816 | 3857 | return -EFAULT; |
---|
3817 | 3858 | |
---|
3818 | 3859 | lock_sock(sk); |
---|
.. | .. |
---|
3826 | 3867 | |
---|
3827 | 3868 | if (optlen < sizeof(val)) |
---|
3828 | 3869 | return -EINVAL; |
---|
3829 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3870 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3830 | 3871 | return -EFAULT; |
---|
3831 | 3872 | |
---|
3832 | 3873 | lock_sock(sk); |
---|
.. | .. |
---|
3842 | 3883 | return -EINVAL; |
---|
3843 | 3884 | if (optlen < sizeof(val)) |
---|
3844 | 3885 | return -EINVAL; |
---|
3845 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3886 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3846 | 3887 | return -EFAULT; |
---|
3847 | 3888 | |
---|
3848 | 3889 | lock_sock(sk); |
---|
.. | .. |
---|
3861 | 3902 | |
---|
3862 | 3903 | if (optlen != sizeof(val)) |
---|
3863 | 3904 | return -EINVAL; |
---|
3864 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3905 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3865 | 3906 | return -EFAULT; |
---|
3866 | 3907 | |
---|
3867 | 3908 | po->tp_tstamp = val; |
---|
.. | .. |
---|
3869 | 3910 | } |
---|
3870 | 3911 | case PACKET_FANOUT: |
---|
3871 | 3912 | { |
---|
3872 | | - int val; |
---|
| 3913 | + struct fanout_args args = { 0 }; |
---|
3873 | 3914 | |
---|
3874 | | - if (optlen != sizeof(val)) |
---|
| 3915 | + if (optlen != sizeof(int) && optlen != sizeof(args)) |
---|
3875 | 3916 | return -EINVAL; |
---|
3876 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3917 | + if (copy_from_sockptr(&args, optval, optlen)) |
---|
3877 | 3918 | return -EFAULT; |
---|
3878 | 3919 | |
---|
3879 | | - return fanout_add(sk, val & 0xffff, val >> 16); |
---|
| 3920 | + return fanout_add(sk, &args); |
---|
3880 | 3921 | } |
---|
3881 | 3922 | case PACKET_FANOUT_DATA: |
---|
3882 | 3923 | { |
---|
.. | .. |
---|
3886 | 3927 | |
---|
3887 | 3928 | return fanout_set_data(po, optval, optlen); |
---|
3888 | 3929 | } |
---|
| 3930 | + case PACKET_IGNORE_OUTGOING: |
---|
| 3931 | + { |
---|
| 3932 | + int val; |
---|
| 3933 | + |
---|
| 3934 | + if (optlen != sizeof(val)) |
---|
| 3935 | + return -EINVAL; |
---|
| 3936 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
| 3937 | + return -EFAULT; |
---|
| 3938 | + if (val < 0 || val > 1) |
---|
| 3939 | + return -EINVAL; |
---|
| 3940 | + |
---|
| 3941 | + po->prot_hook.ignore_outgoing = !!val; |
---|
| 3942 | + return 0; |
---|
| 3943 | + } |
---|
3889 | 3944 | case PACKET_TX_HAS_OFF: |
---|
3890 | 3945 | { |
---|
3891 | 3946 | unsigned int val; |
---|
3892 | 3947 | |
---|
3893 | 3948 | if (optlen != sizeof(val)) |
---|
3894 | 3949 | return -EINVAL; |
---|
3895 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3950 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3896 | 3951 | return -EFAULT; |
---|
3897 | 3952 | |
---|
3898 | 3953 | lock_sock(sk); |
---|
.. | .. |
---|
3911 | 3966 | |
---|
3912 | 3967 | if (optlen != sizeof(val)) |
---|
3913 | 3968 | return -EINVAL; |
---|
3914 | | - if (copy_from_user(&val, optval, sizeof(val))) |
---|
| 3969 | + if (copy_from_sockptr(&val, optval, sizeof(val))) |
---|
3915 | 3970 | return -EFAULT; |
---|
3916 | 3971 | |
---|
3917 | 3972 | po->xmit = val ? packet_direct_xmit : dev_queue_xmit; |
---|
.. | .. |
---|
3932 | 3987 | void *data = &val; |
---|
3933 | 3988 | union tpacket_stats_u st; |
---|
3934 | 3989 | struct tpacket_rollover_stats rstats; |
---|
| 3990 | + int drops; |
---|
3935 | 3991 | |
---|
3936 | 3992 | if (level != SOL_PACKET) |
---|
3937 | 3993 | return -ENOPROTOOPT; |
---|
.. | .. |
---|
3948 | 4004 | memcpy(&st, &po->stats, sizeof(st)); |
---|
3949 | 4005 | memset(&po->stats, 0, sizeof(po->stats)); |
---|
3950 | 4006 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
| 4007 | + drops = atomic_xchg(&po->tp_drops, 0); |
---|
3951 | 4008 | |
---|
3952 | 4009 | if (po->tp_version == TPACKET_V3) { |
---|
3953 | 4010 | lv = sizeof(struct tpacket_stats_v3); |
---|
3954 | | - st.stats3.tp_packets += st.stats3.tp_drops; |
---|
| 4011 | + st.stats3.tp_drops = drops; |
---|
| 4012 | + st.stats3.tp_packets += drops; |
---|
3955 | 4013 | data = &st.stats3; |
---|
3956 | 4014 | } else { |
---|
3957 | 4015 | lv = sizeof(struct tpacket_stats); |
---|
3958 | | - st.stats1.tp_packets += st.stats1.tp_drops; |
---|
| 4016 | + st.stats1.tp_drops = drops; |
---|
| 4017 | + st.stats1.tp_packets += drops; |
---|
3959 | 4018 | data = &st.stats1; |
---|
3960 | 4019 | } |
---|
3961 | 4020 | |
---|
.. | .. |
---|
4009 | 4068 | ((u32)po->fanout->flags << 24)) : |
---|
4010 | 4069 | 0); |
---|
4011 | 4070 | break; |
---|
| 4071 | + case PACKET_IGNORE_OUTGOING: |
---|
| 4072 | + val = po->prot_hook.ignore_outgoing; |
---|
| 4073 | + break; |
---|
4012 | 4074 | case PACKET_ROLLOVER_STATS: |
---|
4013 | 4075 | if (!po->rollover) |
---|
4014 | 4076 | return -EINVAL; |
---|
.. | .. |
---|
4037 | 4099 | return 0; |
---|
4038 | 4100 | } |
---|
4039 | 4101 | |
---|
4040 | | - |
---|
4041 | | -#ifdef CONFIG_COMPAT |
---|
4042 | | -static int compat_packet_setsockopt(struct socket *sock, int level, int optname, |
---|
4043 | | - char __user *optval, unsigned int optlen) |
---|
4044 | | -{ |
---|
4045 | | - struct packet_sock *po = pkt_sk(sock->sk); |
---|
4046 | | - |
---|
4047 | | - if (level != SOL_PACKET) |
---|
4048 | | - return -ENOPROTOOPT; |
---|
4049 | | - |
---|
4050 | | - if (optname == PACKET_FANOUT_DATA && |
---|
4051 | | - po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { |
---|
4052 | | - optval = (char __user *)get_compat_bpf_fprog(optval); |
---|
4053 | | - if (!optval) |
---|
4054 | | - return -EFAULT; |
---|
4055 | | - optlen = sizeof(struct sock_fprog); |
---|
4056 | | - } |
---|
4057 | | - |
---|
4058 | | - return packet_setsockopt(sock, level, optname, optval, optlen); |
---|
4059 | | -} |
---|
4060 | | -#endif |
---|
4061 | | - |
---|
4062 | 4102 | static int packet_notifier(struct notifier_block *this, |
---|
4063 | 4103 | unsigned long msg, void *ptr) |
---|
4064 | 4104 | { |
---|
.. | .. |
---|
4074 | 4114 | case NETDEV_UNREGISTER: |
---|
4075 | 4115 | if (po->mclist) |
---|
4076 | 4116 | packet_dev_mclist_delete(dev, &po->mclist); |
---|
4077 | | - /* fallthrough */ |
---|
| 4117 | + fallthrough; |
---|
4078 | 4118 | |
---|
4079 | 4119 | case NETDEV_DOWN: |
---|
4080 | 4120 | if (dev->ifindex == po->ifindex) { |
---|
.. | .. |
---|
4134 | 4174 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
4135 | 4175 | return put_user(amount, (int __user *)arg); |
---|
4136 | 4176 | } |
---|
4137 | | - case SIOCGSTAMP: |
---|
4138 | | - return sock_get_timestamp(sk, (struct timeval __user *)arg); |
---|
4139 | | - case SIOCGSTAMPNS: |
---|
4140 | | - return sock_get_timestampns(sk, (struct timespec __user *)arg); |
---|
4141 | | - |
---|
4142 | 4177 | #ifdef CONFIG_INET |
---|
4143 | 4178 | case SIOCADDRT: |
---|
4144 | 4179 | case SIOCDELRT: |
---|
.. | .. |
---|
4176 | 4211 | TP_STATUS_KERNEL)) |
---|
4177 | 4212 | mask |= EPOLLIN | EPOLLRDNORM; |
---|
4178 | 4213 | } |
---|
4179 | | - if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) |
---|
4180 | | - po->pressure = 0; |
---|
| 4214 | + packet_rcv_try_clear_pressure(po); |
---|
4181 | 4215 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
---|
4182 | 4216 | spin_lock_bh(&sk->sk_write_queue.lock); |
---|
4183 | 4217 | if (po->tx_ring.pg_vec) { |
---|
.. | .. |
---|
4296 | 4330 | struct packet_ring_buffer *rb; |
---|
4297 | 4331 | struct sk_buff_head *rb_queue; |
---|
4298 | 4332 | __be16 num; |
---|
4299 | | - int err = -EINVAL; |
---|
| 4333 | + int err; |
---|
4300 | 4334 | /* Added to avoid minimal code churn */ |
---|
4301 | 4335 | struct tpacket_req *req = &req_u->req; |
---|
4302 | 4336 | |
---|
.. | .. |
---|
4526 | 4560 | .getname = packet_getname_spkt, |
---|
4527 | 4561 | .poll = datagram_poll, |
---|
4528 | 4562 | .ioctl = packet_ioctl, |
---|
| 4563 | + .gettstamp = sock_gettstamp, |
---|
4529 | 4564 | .listen = sock_no_listen, |
---|
4530 | 4565 | .shutdown = sock_no_shutdown, |
---|
4531 | | - .setsockopt = sock_no_setsockopt, |
---|
4532 | | - .getsockopt = sock_no_getsockopt, |
---|
4533 | 4566 | .sendmsg = packet_sendmsg_spkt, |
---|
4534 | 4567 | .recvmsg = packet_recvmsg, |
---|
4535 | 4568 | .mmap = sock_no_mmap, |
---|
.. | .. |
---|
4547 | 4580 | .getname = packet_getname, |
---|
4548 | 4581 | .poll = packet_poll, |
---|
4549 | 4582 | .ioctl = packet_ioctl, |
---|
| 4583 | + .gettstamp = sock_gettstamp, |
---|
4550 | 4584 | .listen = sock_no_listen, |
---|
4551 | 4585 | .shutdown = sock_no_shutdown, |
---|
4552 | 4586 | .setsockopt = packet_setsockopt, |
---|
4553 | 4587 | .getsockopt = packet_getsockopt, |
---|
4554 | | -#ifdef CONFIG_COMPAT |
---|
4555 | | - .compat_setsockopt = compat_packet_setsockopt, |
---|
4556 | | -#endif |
---|
4557 | 4588 | .sendmsg = packet_sendmsg, |
---|
4558 | 4589 | .recvmsg = packet_recvmsg, |
---|
4559 | 4590 | .mmap = packet_mmap, |
---|
.. | .. |
---|
4630 | 4661 | mutex_init(&net->packet.sklist_lock); |
---|
4631 | 4662 | INIT_HLIST_HEAD(&net->packet.sklist); |
---|
4632 | 4663 | |
---|
| 4664 | +#ifdef CONFIG_PROC_FS |
---|
4633 | 4665 | if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, |
---|
4634 | 4666 | sizeof(struct seq_net_private))) |
---|
4635 | 4667 | return -ENOMEM; |
---|
| 4668 | +#endif /* CONFIG_PROC_FS */ |
---|
4636 | 4669 | |
---|
4637 | 4670 | return 0; |
---|
4638 | 4671 | } |
---|