.. | .. |
---|
119 | 119 | return cpu; |
---|
120 | 120 | } |
---|
121 | 121 | |
---|
122 | | -/* This function is racy, in the sense that next is unlocked, so it could return |
---|
123 | | - * the same CPU twice. A race-free version of this would be to instead store an |
---|
124 | | - * atomic sequence number, do an increment-and-return, and then iterate through |
---|
125 | | - * every possible CPU until we get to that index -- choose_cpu. However that's |
---|
126 | | - * a bit slower, and it doesn't seem like this potential race actually |
---|
127 | | - * introduces any performance loss, so we live with it. |
---|
| 122 | +/* This function is racy, in the sense that it's called while last_cpu is |
---|
| 123 | + * unlocked, so it could return the same CPU twice. Adding locking or using |
---|
| 124 | + * atomic sequence numbers is slower though, and the consequences of racing are |
---|
| 125 | + * harmless, so live with it. |
---|
128 | 126 | */ |
---|
129 | | -static inline int wg_cpumask_next_online(int *next) |
---|
| 127 | +static inline int wg_cpumask_next_online(int *last_cpu) |
---|
130 | 128 | { |
---|
131 | | - int cpu = *next; |
---|
132 | | - |
---|
133 | | - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) |
---|
134 | | - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
---|
135 | | - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
---|
| 129 | + int cpu = cpumask_next(*last_cpu, cpu_online_mask); |
---|
| 130 | + if (cpu >= nr_cpu_ids) |
---|
| 131 | + cpu = cpumask_first(cpu_online_mask); |
---|
| 132 | + *last_cpu = cpu; |
---|
136 | 133 | return cpu; |
---|
137 | 134 | } |
---|
138 | 135 | |
---|
.. | .. |
---|
161 | 158 | |
---|
162 | 159 | static inline int wg_queue_enqueue_per_device_and_peer( |
---|
163 | 160 | struct crypt_queue *device_queue, struct prev_queue *peer_queue, |
---|
164 | | - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) |
---|
| 161 | + struct sk_buff *skb, struct workqueue_struct *wq) |
---|
165 | 162 | { |
---|
166 | 163 | int cpu; |
---|
167 | 164 | |
---|
.. | .. |
---|
175 | 172 | /* Then we queue it up in the device queue, which consumes the |
---|
176 | 173 | * packet as soon as it can. |
---|
177 | 174 | */ |
---|
178 | | - cpu = wg_cpumask_next_online(next_cpu); |
---|
| 175 | + cpu = wg_cpumask_next_online(&device_queue->last_cpu); |
---|
179 | 176 | if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) |
---|
180 | 177 | return -EPIPE; |
---|
181 | 178 | queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); |
---|