.. | .. |
---|
17 | 17 | struct wg_peer; |
---|
18 | 18 | struct multicore_worker; |
---|
19 | 19 | struct crypt_queue; |
---|
| 20 | +struct prev_queue; |
---|
20 | 21 | struct sk_buff; |
---|
21 | 22 | |
---|
22 | 23 | /* queueing.c APIs: */ |
---|
23 | 24 | int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, |
---|
24 | | - bool multicore, unsigned int len); |
---|
25 | | -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); |
---|
| 25 | + unsigned int len); |
---|
| 26 | +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); |
---|
26 | 27 | struct multicore_worker __percpu * |
---|
27 | 28 | wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); |
---|
28 | 29 | |
---|
.. | .. |
---|
93 | 94 | skb->dev = NULL; |
---|
94 | 95 | #ifdef CONFIG_NET_SCHED |
---|
95 | 96 | skb->tc_index = 0; |
---|
96 | | - skb_reset_tc(skb); |
---|
97 | 97 | #endif |
---|
| 98 | + skb_reset_redirect(skb); |
---|
98 | 99 | skb->hdr_len = skb_headroom(skb); |
---|
99 | 100 | skb_reset_mac_header(skb); |
---|
100 | 101 | skb_reset_network_header(skb); |
---|
101 | 102 | skb_reset_transport_header(skb); |
---|
102 | | - skb_probe_transport_header(skb, 0); |
---|
| 103 | + skb_probe_transport_header(skb); |
---|
103 | 104 | skb_reset_inner_headers(skb); |
---|
104 | 105 | } |
---|
105 | 106 | |
---|
.. | .. |
---|
118 | 119 | return cpu; |
---|
119 | 120 | } |
---|
120 | 121 | |
---|
121 | | -/* This function is racy, in the sense that next is unlocked, so it could return |
---|
122 | | - * the same CPU twice. A race-free version of this would be to instead store an |
---|
123 | | - * atomic sequence number, do an increment-and-return, and then iterate through |
---|
124 | | - * every possible CPU until we get to that index -- choose_cpu. However that's |
---|
125 | | - * a bit slower, and it doesn't seem like this potential race actually |
---|
126 | | - * introduces any performance loss, so we live with it. |
---|
| 122 | +/* This function is racy, in the sense that it's called while last_cpu is |
---|
| 123 | + * unlocked, so it could return the same CPU twice. Adding locking or using |
---|
| 124 | + * atomic sequence numbers is slower though, and the consequences of racing are |
---|
| 125 | + * harmless, so live with it. |
---|
127 | 126 | */ |
---|
128 | | -static inline int wg_cpumask_next_online(int *next) |
---|
| 127 | +static inline int wg_cpumask_next_online(int *last_cpu) |
---|
129 | 128 | { |
---|
130 | | - int cpu = *next; |
---|
131 | | - |
---|
132 | | - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) |
---|
133 | | - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
---|
134 | | - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
---|
| 129 | + int cpu = cpumask_next(*last_cpu, cpu_online_mask); |
---|
| 130 | + if (cpu >= nr_cpu_ids) |
---|
| 131 | + cpu = cpumask_first(cpu_online_mask); |
---|
| 132 | + *last_cpu = cpu; |
---|
135 | 133 | return cpu; |
---|
136 | 134 | } |
---|
137 | 135 | |
---|
| 136 | +void wg_prev_queue_init(struct prev_queue *queue); |
---|
| 137 | + |
---|
| 138 | +/* Multi producer */ |
---|
| 139 | +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); |
---|
| 140 | + |
---|
| 141 | +/* Single consumer */ |
---|
| 142 | +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); |
---|
| 143 | + |
---|
| 144 | +/* Single consumer */ |
---|
| 145 | +static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) |
---|
| 146 | +{ |
---|
| 147 | + if (queue->peeked) |
---|
| 148 | + return queue->peeked; |
---|
| 149 | + queue->peeked = wg_prev_queue_dequeue(queue); |
---|
| 150 | + return queue->peeked; |
---|
| 151 | +} |
---|
| 152 | + |
---|
| 153 | +/* Single consumer */ |
---|
| 154 | +static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) |
---|
| 155 | +{ |
---|
| 156 | + queue->peeked = NULL; |
---|
| 157 | +} |
---|
| 158 | + |
---|
138 | 159 | static inline int wg_queue_enqueue_per_device_and_peer( |
---|
139 | | - struct crypt_queue *device_queue, struct crypt_queue *peer_queue, |
---|
140 | | - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) |
---|
| 160 | + struct crypt_queue *device_queue, struct prev_queue *peer_queue, |
---|
| 161 | + struct sk_buff *skb, struct workqueue_struct *wq) |
---|
141 | 162 | { |
---|
142 | 163 | int cpu; |
---|
143 | 164 | |
---|
.. | .. |
---|
145 | 166 | /* We first queue this up for the peer ingestion, but the consumer |
---|
146 | 167 | * will wait for the state to change to CRYPTED or DEAD before. |
---|
147 | 168 | */ |
---|
148 | | - if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) |
---|
| 169 | + if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) |
---|
149 | 170 | return -ENOSPC; |
---|
| 171 | + |
---|
150 | 172 | /* Then we queue it up in the device queue, which consumes the |
---|
151 | 173 | * packet as soon as it can. |
---|
152 | 174 | */ |
---|
153 | | - cpu = wg_cpumask_next_online(next_cpu); |
---|
| 175 | + cpu = wg_cpumask_next_online(&device_queue->last_cpu); |
---|
154 | 176 | if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) |
---|
155 | 177 | return -EPIPE; |
---|
156 | 178 | queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); |
---|
157 | 179 | return 0; |
---|
158 | 180 | } |
---|
159 | 181 | |
---|
160 | | -static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, |
---|
161 | | - struct sk_buff *skb, |
---|
162 | | - enum packet_state state) |
---|
| 182 | +static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) |
---|
163 | 183 | { |
---|
164 | 184 | /* We take a reference, because as soon as we call atomic_set, the |
---|
165 | 185 | * peer can be freed from below us. |
---|
.. | .. |
---|
167 | 187 | struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); |
---|
168 | 188 | |
---|
169 | 189 | atomic_set_release(&PACKET_CB(skb)->state, state); |
---|
170 | | - queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, |
---|
171 | | - peer->internal_id), |
---|
172 | | - peer->device->packet_crypt_wq, &queue->work); |
---|
| 190 | + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), |
---|
| 191 | + peer->device->packet_crypt_wq, &peer->transmit_packet_work); |
---|
173 | 192 | wg_peer_put(peer); |
---|
174 | 193 | } |
---|
175 | 194 | |
---|
176 | | -static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, |
---|
177 | | - enum packet_state state) |
---|
| 195 | +static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) |
---|
178 | 196 | { |
---|
179 | 197 | /* We take a reference, because as soon as we call atomic_set, the |
---|
180 | 198 | * peer can be freed from below us. |
---|