From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/net/packet/af_packet.c | 562 ++++++++++++++++++++++++++++++-------------------------
1 files changed, 304 insertions(+), 258 deletions(-)
diff --git a/kernel/net/packet/af_packet.c b/kernel/net/packet/af_packet.c
index 51bca56..bbdb32a 100644
--- a/kernel/net/packet/af_packet.c
+++ b/kernel/net/packet/af_packet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -43,13 +44,6 @@
* Chetan Loke : Implemented TPACKET_V3 block abstraction
* layer.
* Copyright (C) 2011, <lokec@ccs.neu.edu>
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/types.h>
@@ -63,7 +57,6 @@
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
-#include <linux/delay.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -100,52 +93,56 @@
/*
Assumptions:
- - if device has no dev->hard_header routine, it adds and removes ll header
- inside itself. In this case ll header is invisible outside of device,
- but higher levels still should reserve dev->hard_header_len.
- Some devices are enough clever to reallocate skb, when header
- will not fit to reserved space (tunnel), another ones are silly
- (PPP).
+ - If the device has no dev->header_ops->create, there is no LL header
+ visible above the device. In this case, its hard_header_len should be 0.
+ The device may prepend its own header internally. In this case, its
+ needed_headroom should be set to the space needed for it to add its
+ internal header.
+ For example, a WiFi driver pretending to be an Ethernet driver should
+ set its hard_header_len to be the Ethernet header length, and set its
+ needed_headroom to be (the real WiFi header length - the fake Ethernet
+ header length).
- packet socket receives packets with pulled ll header,
so that SOCK_RAW should push it back.
On receive:
-----------
-Incoming, dev->hard_header!=NULL
+Incoming, dev_has_header(dev) == true
mac_header -> ll header
data -> data
-Outgoing, dev->hard_header!=NULL
+Outgoing, dev_has_header(dev) == true
mac_header -> ll header
data -> ll header
-Incoming, dev->hard_header==NULL
- mac_header -> UNKNOWN position. It is very likely, that it points to ll
- header. PPP makes it, that is wrong, because introduce
- assymetry between rx and tx paths.
+Incoming, dev_has_header(dev) == false
+ mac_header -> data
+ However drivers often make it point to the ll header.
+ This is incorrect because the ll header should be invisible to us.
data -> data
-Outgoing, dev->hard_header==NULL
- mac_header -> data. ll header is still not built!
+Outgoing, dev_has_header(dev) == false
+ mac_header -> data. ll header is invisible to us.
data -> data
Resume
- If dev->hard_header==NULL we are unlikely to restore sensible ll header.
+ If dev_has_header(dev) == false we are unable to restore the ll header,
+ because it is invisible to us.
On transmit:
------------
-dev->hard_header != NULL
+dev->header_ops != NULL
mac_header -> ll header
data -> ll header
-dev->hard_header == NULL (ll header is added by device, we cannot control it)
+dev->header_ops == NULL (ll header is invisible to us)
mac_header -> data
data -> data
- We should set nh.raw on output to correct posistion,
+ We should set network_header on output to the correct position,
packet classifier depends on it.
*/
@@ -184,7 +181,6 @@
#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
-#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
struct packet_sock;
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -273,27 +269,26 @@
static bool packet_use_direct_xmit(const struct packet_sock *po)
{
- return po->xmit == packet_direct_xmit;
-}
-
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ return READ_ONCE(po->xmit) == packet_direct_xmit;
}
static u16 packet_pick_tx_queue(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
const struct net_device_ops *ops = dev->netdev_ops;
+ int cpu = raw_smp_processor_id();
u16 queue_index;
+#ifdef CONFIG_XPS
+ skb->sender_cpu = cpu + 1;
+#endif
+ skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
if (ops->ndo_select_queue) {
- queue_index = ops->ndo_select_queue(dev, skb, NULL,
- __packet_pick_tx_queue);
+ queue_index = ops->ndo_select_queue(dev, skb, NULL);
queue_index = netdev_cap_txqueue(dev, queue_index);
} else {
- queue_index = __packet_pick_tx_queue(dev, skb, NULL);
+ queue_index = netdev_pick_tx(dev, skb, NULL);
}
return queue_index;
@@ -371,18 +366,20 @@
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -393,23 +390,25 @@
smp_wmb();
}
-static int __packet_get_status(struct packet_sock *po, void *frame)
+static int __packet_get_status(const struct packet_sock *po, void *frame)
{
union tpacket_uhdr h;
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
@@ -417,17 +416,18 @@
}
}
-static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
unsigned int flags)
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
if (shhwtstamps &&
(flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+ ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
return TP_STATUS_TS_RAW_HARDWARE;
- if (ktime_to_timespec_cond(skb->tstamp, ts))
+ if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
+ ktime_to_timespec64_cond(skb->tstamp, ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
@@ -437,13 +437,20 @@
struct sk_buff *skb)
{
union tpacket_uhdr h;
- struct timespec ts;
+ struct timespec64 ts;
__u32 ts_status;
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
return 0;
h.raw = frame;
+ /*
+ * versions 1 through 3 overflow the timestamps in y2106, since they
+ * all store the seconds in a 32-bit unsigned integer.
+ * If we create a version 4, that should have a 64-bit timestamp,
+ * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
+ * nanoseconds.
+ */
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_sec = ts.tv_sec;
@@ -469,10 +476,10 @@
return ts_status;
}
-static void *packet_lookup_frame(struct packet_sock *po,
- struct packet_ring_buffer *rb,
- unsigned int position,
- int status)
+static void *packet_lookup_frame(const struct packet_sock *po,
+ const struct packet_ring_buffer *rb,
+ unsigned int position,
+ int status)
{
unsigned int pg_vec_pos, frame_offset;
union tpacket_uhdr h;
@@ -529,7 +536,7 @@
int blk_size_in_bytes)
{
struct net_device *dev;
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+ unsigned int mbits, div;
struct ethtool_link_ksettings ecmd;
int err;
@@ -541,31 +548,25 @@
}
err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
- if (!err) {
- /*
- * If the link speed is so slow you don't really
- * need to worry about perf anyways
- */
- if (ecmd.base.speed < SPEED_1000 ||
- ecmd.base.speed == SPEED_UNKNOWN) {
- return DEFAULT_PRB_RETIRE_TOV;
- } else {
- msec = 1;
- div = ecmd.base.speed / 1000;
- }
- } else
+ if (err)
return DEFAULT_PRB_RETIRE_TOV;
+ /* If the link speed is so slow you don't really
+ * need to worry about perf anyways
+ */
+ if (ecmd.base.speed < SPEED_1000 ||
+ ecmd.base.speed == SPEED_UNKNOWN)
+ return DEFAULT_PRB_RETIRE_TOV;
+
+ div = ecmd.base.speed / 1000;
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
if (div)
mbits /= div;
- tmo = mbits * msec;
-
if (div)
- return tmo+1;
- return tmo;
+ return mbits + 1;
+ return mbits;
}
static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
@@ -601,6 +602,7 @@
req_u->req3.tp_block_size);
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+ rwlock_init(&p1->blk_fill_in_prog_lock);
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
@@ -667,10 +669,9 @@
*
*/
if (BLOCK_NUM_PKTS(pbd)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_chill();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
@@ -768,7 +769,7 @@
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
struct sock *sk = &po->sk;
- if (po->stats.stats3.tp_drops)
+ if (atomic_read(&po->tp_drops))
status |= TP_STATUS_LOSING;
last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -784,8 +785,8 @@
* It shouldn't really happen as we don't close empty
* blocks. See prb_retire_rx_blk_timer_expired().
*/
- struct timespec ts;
- getnstimeofday(&ts);
+ struct timespec64 ts;
+ ktime_get_real_ts64(&ts);
h1->ts_last_pkt.ts_sec = ts.tv_sec;
h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
}
@@ -815,7 +816,7 @@
static void prb_open_block(struct tpacket_kbdq_core *pkc1,
struct tpacket_block_desc *pbd1)
{
- struct timespec ts;
+ struct timespec64 ts;
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
smp_rmb();
@@ -828,7 +829,7 @@
BLOCK_NUM_PKTS(pbd1) = 0;
BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- getnstimeofday(&ts);
+ ktime_get_real_ts64(&ts);
h1->ts_first_pkt.ts_sec = ts.tv_sec;
h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
@@ -929,10 +930,9 @@
* the timer-handler already handled this case.
*/
if (!(status & TP_STATUS_BLK_TMO)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_chill();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
prb_close_block(pkc, pbd, po, status);
return;
@@ -953,7 +953,8 @@
__releases(&pkc->blk_fill_in_prog_lock)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
- atomic_dec(&pkc->blk_fill_in_prog);
+
+ read_unlock(&pkc->blk_fill_in_prog_lock);
}
static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
@@ -1008,14 +1009,13 @@
pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_NUM_PKTS(pbd) += 1;
- atomic_inc(&pkc->blk_fill_in_prog);
+ read_lock(&pkc->blk_fill_in_prog_lock);
prb_run_all_ft_ops(pkc, ppd);
}
/* Assumes caller has the sk->rx_queue.lock */
static void *__packet_lookup_frame_in_block(struct packet_sock *po,
struct sk_buff *skb,
- int status,
unsigned int len
)
{
@@ -1087,7 +1087,7 @@
po->rx_ring.head, status);
return curr;
case TPACKET_V3:
- return __packet_lookup_frame_in_block(po, skb, status, len);
+ return __packet_lookup_frame_in_block(po, skb, len);
default:
WARN(1, "TPACKET version not supported\n");
BUG();
@@ -1095,10 +1095,10 @@
}
}
-static void *prb_lookup_block(struct packet_sock *po,
- struct packet_ring_buffer *rb,
- unsigned int idx,
- int status)
+static void *prb_lookup_block(const struct packet_sock *po,
+ const struct packet_ring_buffer *rb,
+ unsigned int idx,
+ int status)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
@@ -1211,12 +1211,12 @@
#define ROOM_LOW 0x1
#define ROOM_NORMAL 0x2
-static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
{
int idx, len;
- len = po->rx_ring.frame_max + 1;
- idx = po->rx_ring.head;
+ len = READ_ONCE(po->rx_ring.frame_max) + 1;
+ idx = READ_ONCE(po->rx_ring.head);
if (pow_off)
idx += len >> pow_off;
if (idx >= len)
@@ -1224,12 +1224,12 @@
return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}
-static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
{
int idx, len;
- len = po->rx_ring.prb_bdqc.knum_blocks;
- idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+ len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
+ idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
if (pow_off)
idx += len >> pow_off;
if (idx >= len)
@@ -1237,15 +1237,18 @@
return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}
-static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+static int __packet_rcv_has_room(const struct packet_sock *po,
+ const struct sk_buff *skb)
{
- struct sock *sk = &po->sk;
+ const struct sock *sk = &po->sk;
int ret = ROOM_NONE;
if (po->prot_hook.func != tpacket_rcv) {
- int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
- - (skb ? skb->truesize : 0);
- if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+ int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+ - (skb ? skb->truesize : 0);
+
+ if (avail > (rcvbuf >> ROOM_POW_OFF))
return ROOM_NORMAL;
else if (avail > 0)
return ROOM_LOW;
@@ -1270,17 +1273,22 @@
static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
- int ret;
- bool has_room;
+ int pressure, ret;
- spin_lock_bh(&po->sk.sk_receive_queue.lock);
ret = __packet_rcv_has_room(po, skb);
- has_room = ret == ROOM_NORMAL;
- if (po->pressure == has_room)
- po->pressure = !has_room;
- spin_unlock_bh(&po->sk.sk_receive_queue.lock);
+ pressure = ret != ROOM_NORMAL;
+
+ if (READ_ONCE(po->pressure) != pressure)
+ WRITE_ONCE(po->pressure, pressure);
return ret;
+}
+
+static void packet_rcv_try_clear_pressure(struct packet_sock *po)
+{
+ if (READ_ONCE(po->pressure) &&
+ __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
+ WRITE_ONCE(po->pressure, 0);
}
static void packet_sock_destruct(struct sock *sk)
@@ -1356,7 +1364,7 @@
struct packet_sock *po, *po_next, *po_skip = NULL;
unsigned int i, j, room = ROOM_NONE;
- po = pkt_sk(f->arr[idx]);
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
if (try_self) {
room = packet_rcv_has_room(po, skb);
@@ -1368,8 +1376,8 @@
i = j = min_t(int, po->rollover->sock, num - 1);
do {
- po_next = pkt_sk(f->arr[i]);
- if (po_next != po_skip && !po_next->pressure &&
+ po_next = pkt_sk(rcu_dereference(f->arr[i]));
+ if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
po->rollover->sock = i;
@@ -1463,7 +1471,7 @@
if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
idx = fanout_demux_rollover(f, skb, idx, true, num);
- po = pkt_sk(f->arr[idx]);
+ po = pkt_sk(rcu_dereference(f->arr[idx]));
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
@@ -1477,7 +1485,7 @@
struct packet_fanout *f = po->fanout;
spin_lock(&f->lock);
- f->arr[f->num_members] = sk;
+ rcu_assign_pointer(f->arr[f->num_members], sk);
smp_wmb();
f->num_members++;
if (f->num_members == 1)
@@ -1492,11 +1500,14 @@
spin_lock(&f->lock);
for (i = 0; i < f->num_members; i++) {
- if (f->arr[i] == sk)
+ if (rcu_dereference_protected(f->arr[i],
+ lockdep_is_held(&f->lock)) == sk)
break;
}
BUG_ON(i >= f->num_members);
- f->arr[i] = f->arr[f->num_members - 1];
+ rcu_assign_pointer(f->arr[i],
+ rcu_dereference_protected(f->arr[f->num_members - 1],
+ lockdep_is_held(&f->lock)));
f->num_members--;
if (f->num_members == 0)
__dev_remove_pack(&f->prot_hook);
@@ -1539,7 +1550,7 @@
}
}
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1548,10 +1559,10 @@
if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
return -EPERM;
- if (len != sizeof(fprog))
- return -EINVAL;
- if (copy_from_user(&fprog, data, len))
- return -EFAULT;
+
+ ret = copy_bpf_fprog_from_user(&fprog, data, len);
+ if (ret)
+ return ret;
ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
if (ret)
@@ -1561,7 +1572,7 @@
return 0;
}
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1571,7 +1582,7 @@
return -EPERM;
if (len != sizeof(fd))
return -EINVAL;
- if (copy_from_user(&fd, data, len))
+ if (copy_from_sockptr(&fd, data, len))
return -EFAULT;
new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
@@ -1582,7 +1593,7 @@
return 0;
}
-static int fanout_set_data(struct packet_sock *po, char __user *data,
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
switch (po->fanout->type) {
@@ -1634,13 +1645,15 @@
return false;
}
-static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+static int fanout_add(struct sock *sk, struct fanout_args *args)
{
struct packet_rollover *rollover = NULL;
struct packet_sock *po = pkt_sk(sk);
+ u16 type_flags = args->type_flags;
struct packet_fanout *f, *match;
u8 type = type_flags & 0xff;
u8 flags = type_flags >> 8;
+ u16 id = args->id;
int err;
switch (type) {
@@ -1698,11 +1711,21 @@
}
}
err = -EINVAL;
- if (match && match->flags != flags)
- goto out;
- if (!match) {
+ if (match) {
+ if (match->flags != flags)
+ goto out;
+ if (args->max_num_members &&
+ args->max_num_members != match->max_num_members)
+ goto out;
+ } else {
+ if (args->max_num_members > PACKET_FANOUT_MAX)
+ goto out;
+ if (!args->max_num_members)
+ /* legacy PACKET_FANOUT_MAX */
+ args->max_num_members = 256;
err = -ENOMEM;
- match = kzalloc(sizeof(*match), GFP_KERNEL);
+ match = kvzalloc(struct_size(match, arr, args->max_num_members),
+ GFP_KERNEL);
if (!match)
goto out;
write_pnet(&match->net, sock_net(sk));
@@ -1719,6 +1742,7 @@
match->prot_hook.af_packet_priv = match;
match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
+ match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
}
err = -EINVAL;
@@ -1729,7 +1753,7 @@
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
- if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+ if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
@@ -1746,7 +1770,7 @@
if (err && !refcount_read(&match->sk_ref)) {
list_del(&match->list);
- kfree(match);
+ kvfree(match);
}
out:
@@ -1836,7 +1860,7 @@
skb_dst_drop(skb);
/* drop conntrack reference */
- nf_reset(skb);
+ nf_reset_ct(skb);
spkt = &PACKET_SKB_CB(skb)->sa.pkt;
@@ -1864,6 +1888,24 @@
return 0;
}
+static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
+{
+ int depth;
+
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
+ sock->type == SOCK_RAW) {
+ skb_reset_mac_header(skb);
+ skb->protocol = dev_parse_header_protocol(skb);
+ }
+
+ /* Move network header to the right position for VLAN tagged packets */
+ if (likely(skb->dev->type == ARPHRD_ETHER) &&
+ eth_type_vlan(skb->protocol) &&
+ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
+
+ skb_probe_transport_header(skb);
+}
/*
* Output a raw packet to a device layer. This bypasses all the other
@@ -1956,7 +1998,7 @@
goto retry;
}
- if (!dev_validate_header(dev, skb->data, len)) {
+ if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
err = -EINVAL;
goto out_unlock;
}
@@ -1979,12 +2021,12 @@
skb->mark = sk->sk_mark;
skb->tstamp = sockc.transmit_time;
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
- skb_probe_transport_header(skb, 0);
+ packet_parse_headers(skb, sock);
dev_queue_xmit(skb);
rcu_read_unlock();
@@ -2061,7 +2103,7 @@
skb->dev = dev;
- if (dev->header_ops) {
+ if (dev_has_header(dev)) {
/* The device has an explicit notion of ll header,
* exported to higher levels.
*
@@ -2106,7 +2148,7 @@
sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_hatype = dev->type;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2126,7 +2168,7 @@
skb_dst_drop(skb);
/* drop conntrack reference */
- nf_reset(skb);
+ nf_reset_ct(skb);
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
@@ -2138,10 +2180,8 @@
drop_n_acct:
is_drop_n_account = true;
- spin_lock(&sk->sk_receive_queue.lock);
- po->stats.stats1.tp_drops++;
+ atomic_inc(&po->tp_drops);
atomic_inc(&sk->sk_drops);
- spin_unlock(&sk->sk_receive_queue.lock);
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2170,7 +2210,7 @@
unsigned short macoff, hdrlen;
unsigned int netoff;
struct sk_buff *copy_skb = NULL;
- struct timespec ts;
+ struct timespec64 ts;
__u32 ts_status;
bool is_drop_n_account = false;
unsigned int slot_id = 0;
@@ -2192,7 +2232,7 @@
if (!net_eq(dev_net(dev), sock_net(sk)))
goto drop;
- if (dev->header_ops) {
+ if (dev_has_header(dev)) {
if (sk->sk_type != SOCK_DGRAM)
skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
@@ -2207,11 +2247,16 @@
if (!res)
goto drop_n_restore;
+ /* If we are flooded, just give up */
+ if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
+ atomic_inc(&po->tp_drops);
+ goto drop_n_restore;
+ }
+
if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
if (snaplen > res)
@@ -2232,9 +2277,7 @@
macoff = netoff - maclen;
}
if (netoff > USHRT_MAX) {
- spin_lock(&sk->sk_receive_queue.lock);
- po->stats.stats1.tp_drops++;
- spin_unlock(&sk->sk_receive_queue.lock);
+ atomic_inc(&po->tp_drops);
goto drop_n_restore;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2247,8 +2290,11 @@
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -2300,7 +2346,7 @@
* Anyways, moving it for V1/V2 only as V3 doesn't need this
* at packet level.
*/
- if (po->stats.stats1.tp_drops)
+ if (atomic_read(&po->tp_drops))
status |= TP_STATUS_LOSING;
}
@@ -2313,8 +2359,13 @@
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
- getnstimeofday(&ts);
+ /* Always timestamp; prefer an existing software timestamp taken
+ * closer to the time of capture.
+ */
+ ts_status = tpacket_get_timestamp(skb, &ts,
+ po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+ if (!ts_status)
+ ktime_get_real_ts64(&ts);
status |= ts_status;
@@ -2370,7 +2421,7 @@
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2413,9 +2464,9 @@
return 0;
drop_n_account:
- is_drop_n_account = true;
- po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
+ atomic_inc(&po->tp_drops);
+ is_drop_n_account = true;
sk->sk_data_ready(sk);
kfree_skb(copy_skb);
@@ -2441,15 +2492,6 @@
}
sock_wfree(skb);
-}
-
-static void tpacket_set_protocol(const struct net_device *dev,
- struct sk_buff *skb)
-{
- if (dev->type == ARPHRD_ETHER) {
- skb_reset_mac_header(skb);
- skb->protocol = eth_hdr(skb)->h_proto;
- }
}
static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
@@ -2499,7 +2541,7 @@
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
- sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
@@ -2522,8 +2564,6 @@
return err;
if (!dev_validate_header(dev, skb->data, hdrlen))
return -EINVAL;
- if (!skb->protocol)
- tpacket_set_protocol(dev, skb);
data += hdrlen;
to_write -= hdrlen;
@@ -2558,7 +2598,7 @@
len = ((to_write > len_max) ? len_max : to_write);
}
- skb_probe_transport_header(skb, 0);
+ packet_parse_headers(skb, sock);
return tp_len;
}
@@ -2788,9 +2828,11 @@
packet_inc_pending(&po->tx_ring);
status = TP_STATUS_SEND_REQUEST;
- err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ err = READ_ONCE(po->xmit)(skb);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -2957,13 +2999,13 @@
if (err)
goto out_free;
- if (sock->type == SOCK_RAW &&
- !dev_validate_header(dev, skb->data, len)) {
+ if ((sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
err = -EINVAL;
goto out_free;
}
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
@@ -2977,6 +3019,11 @@
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
+ if (unlikely(extra_len == 4))
+ skb->no_fcs = 1;
+
+ packet_parse_headers(skb, sock);
+
if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
@@ -2985,14 +3032,14 @@
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- skb_probe_transport_header(skb, reserve);
-
- if (unlikely(extra_len == 4))
- skb->no_fcs = 1;
-
- err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ err = READ_ONCE(po->xmit)(skb);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
@@ -3012,10 +3059,13 @@
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- if (po->tx_ring.pg_vec)
+ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+ * tpacket_snd() will redo the check safely.
+ */
+ if (data_race(po->tx_ring.pg_vec))
return tpacket_snd(po, msg);
- else
- return packet_snd(sock, msg, len);
+
+ return packet_snd(sock, msg, len);
}
/*
@@ -3076,7 +3126,7 @@
kfree(po->rollover);
if (f) {
fanout_release_data(f);
- kfree(f);
+ kvfree(f);
}
/*
* Now the socket is dead. No more input will appear.
@@ -3111,6 +3161,9 @@
lock_sock(sk);
spin_lock(&po->bind_lock);
+ if (!proto)
+ proto = po->num;
+
rcu_read_lock();
if (po->fanout) {
@@ -3213,7 +3266,7 @@
memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
name[sizeof(uaddr->sa_data)] = 0;
- return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
+ return packet_do_bind(sk, name, 0, 0);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -3230,8 +3283,7 @@
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- return packet_do_bind(sk, NULL, sll->sll_ifindex,
- sll->sll_protocol ? : pkt_sk(sk)->num);
+ return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
}
static struct proto packet_proto = {
@@ -3371,8 +3423,7 @@
if (skb == NULL)
goto out;
- if (pkt_sk(sk)->pressure)
- packet_rcv_has_room(pkt_sk(sk), NULL);
+ packet_rcv_try_clear_pressure(pkt_sk(sk));
if (pkt_sk(sk)->has_vnet_hdr) {
err = packet_rcv_vnet(msg, skb, &len);
@@ -3407,6 +3458,8 @@
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3429,18 +3482,21 @@
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
- if (pkt_sk(sk)->auxdata) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
struct tpacket_auxdata aux;
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
aux.tp_len = origlen;
@@ -3670,7 +3726,8 @@
}
static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
@@ -3690,7 +3747,7 @@
return -EINVAL;
if (len > sizeof(mreq))
len = sizeof(mreq);
- if (copy_from_user(&mreq, optval, len))
+ if (copy_from_sockptr(&mreq, optval, len))
return -EFAULT;
if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
return -EINVAL;
@@ -3721,7 +3778,7 @@
if (optlen < len) {
ret = -EINVAL;
} else {
- if (copy_from_user(&req_u.req, optval, len))
+ if (copy_from_sockptr(&req_u.req, optval, len))
ret = -EFAULT;
else
ret = packet_set_ring(sk, &req_u, 0,
@@ -3736,7 +3793,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
pkt_sk(sk)->copy_thresh = val;
@@ -3748,7 +3805,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -3774,7 +3831,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val > INT_MAX)
return -EINVAL;
@@ -3794,7 +3851,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3813,12 +3870,10 @@
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->auxdata = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
return 0;
}
case PACKET_ORIGDEV:
@@ -3827,12 +3882,10 @@
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->origdev = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
return 0;
}
case PACKET_VNET_HDR:
@@ -3843,7 +3896,7 @@
return -EINVAL;
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3862,7 +3915,7 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->tp_tstamp = val;
@@ -3870,14 +3923,14 @@
}
case PACKET_FANOUT:
{
- int val;
+ struct fanout_args args = { 0 };
- if (optlen != sizeof(val))
+ if (optlen != sizeof(int) && optlen != sizeof(args))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&args, optval, optlen))
return -EFAULT;
- return fanout_add(sk, val & 0xffff, val >> 16);
+ return fanout_add(sk, &args);
}
case PACKET_FANOUT_DATA:
{
@@ -3887,13 +3940,27 @@
return fanout_set_data(po, optval, optlen);
}
+ case PACKET_IGNORE_OUTGOING:
+ {
+ int val;
+
+ if (optlen != sizeof(val))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+
+ po->prot_hook.ignore_outgoing = !!val;
+ return 0;
+ }
case PACKET_TX_HAS_OFF:
{
unsigned int val;
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3912,10 +3979,11 @@
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
+ /* Paired with all lockless reads of po->xmit */
+ WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit);
return 0;
}
default:
@@ -3933,6 +4001,7 @@
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ int drops;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3949,23 +4018,26 @@
memcpy(&st, &po->stats, sizeof(st));
memset(&po->stats, 0, sizeof(po->stats));
spin_unlock_bh(&sk->sk_receive_queue.lock);
+ drops = atomic_xchg(&po->tp_drops, 0);
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
- st.stats3.tp_packets += st.stats3.tp_drops;
+ st.stats3.tp_drops = drops;
+ st.stats3.tp_packets += drops;
data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
- st.stats1.tp_packets += st.stats1.tp_drops;
+ st.stats1.tp_drops = drops;
+ st.stats1.tp_packets += drops;
data = &st.stats1;
}
break;
case PACKET_AUXDATA:
- val = po->auxdata;
+ val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
break;
case PACKET_ORIGDEV:
- val = po->origdev;
+ val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
break;
case PACKET_VNET_HDR:
val = po->has_vnet_hdr;
@@ -4010,6 +4082,9 @@
((u32)po->fanout->flags << 24)) :
0);
break;
+ case PACKET_IGNORE_OUTGOING:
+ val = po->prot_hook.ignore_outgoing;
+ break;
case PACKET_ROLLOVER_STATS:
if (!po->rollover)
return -EINVAL;
@@ -4038,28 +4113,6 @@
return 0;
}
-
-#ifdef CONFIG_COMPAT
-static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct packet_sock *po = pkt_sk(sock->sk);
-
- if (level != SOL_PACKET)
- return -ENOPROTOOPT;
-
- if (optname == PACKET_FANOUT_DATA &&
- po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
- optval = (char __user *)get_compat_bpf_fprog(optval);
- if (!optval)
- return -EFAULT;
- optlen = sizeof(struct sock_fprog);
- }
-
- return packet_setsockopt(sock, level, optname, optval, optlen);
-}
-#endif
-
static int packet_notifier(struct notifier_block *this,
unsigned long msg, void *ptr)
{
@@ -4075,7 +4128,7 @@
case NETDEV_UNREGISTER:
if (po->mclist)
packet_dev_mclist_delete(dev, &po->mclist);
- /* fallthrough */
+ fallthrough;
case NETDEV_DOWN:
if (dev->ifindex == po->ifindex) {
@@ -4135,11 +4188,6 @@
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
- case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval __user *)arg);
- case SIOCGSTAMPNS:
- return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
#ifdef CONFIG_INET
case SIOCADDRT:
case SIOCDELRT:
@@ -4177,8 +4225,7 @@
TP_STATUS_KERNEL))
mask |= EPOLLIN | EPOLLRDNORM;
}
- if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
- po->pressure = 0;
+ packet_rcv_try_clear_pressure(po);
spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
@@ -4297,7 +4344,7 @@
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
__be16 num;
- int err = -EINVAL;
+ int err;
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
@@ -4527,10 +4574,9 @@
.getname = packet_getname_spkt,
.poll = datagram_poll,
.ioctl = packet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = packet_sendmsg_spkt,
.recvmsg = packet_recvmsg,
.mmap = sock_no_mmap,
@@ -4548,13 +4594,11 @@
.getname = packet_getname,
.poll = packet_poll,
.ioctl = packet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
.getsockopt = packet_getsockopt,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_packet_setsockopt,
-#endif
.sendmsg = packet_sendmsg,
.recvmsg = packet_recvmsg,
.mmap = packet_mmap,
@@ -4631,9 +4675,11 @@
mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
+#ifdef CONFIG_PROC_FS
if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
sizeof(struct seq_net_private)))
return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
return 0;
}
--
Gitblit v1.6.2