| .. | .. |
|---|
| 119 | 119 | return cpu; |
|---|
| 120 | 120 | } |
|---|
| 121 | 121 | |
|---|
| 122 | | -/* This function is racy, in the sense that next is unlocked, so it could return |
|---|
| 123 | | - * the same CPU twice. A race-free version of this would be to instead store an |
|---|
| 124 | | - * atomic sequence number, do an increment-and-return, and then iterate through |
|---|
| 125 | | - * every possible CPU until we get to that index -- choose_cpu. However that's |
|---|
| 126 | | - * a bit slower, and it doesn't seem like this potential race actually |
|---|
| 127 | | - * introduces any performance loss, so we live with it. |
|---|
| 122 | +/* This function is racy, in the sense that it's called while last_cpu is |
|---|
| 123 | + * unlocked, so it could return the same CPU twice. Adding locking or using |
|---|
| 124 | + * atomic sequence numbers is slower though, and the consequences of racing are |
|---|
| 125 | + * harmless, so live with it. |
|---|
| 128 | 126 | */ |
|---|
| 129 | | -static inline int wg_cpumask_next_online(int *next) |
|---|
| 127 | +static inline int wg_cpumask_next_online(int *last_cpu) |
|---|
| 130 | 128 | { |
|---|
| 131 | | - int cpu = *next; |
|---|
| 132 | | - |
|---|
| 133 | | - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) |
|---|
| 134 | | - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
|---|
| 135 | | - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; |
|---|
| 129 | + int cpu = cpumask_next(*last_cpu, cpu_online_mask); |
|---|
| 130 | + if (cpu >= nr_cpu_ids) |
|---|
| 131 | + cpu = cpumask_first(cpu_online_mask); |
|---|
| 132 | + *last_cpu = cpu; |
|---|
| 136 | 133 | return cpu; |
|---|
| 137 | 134 | } |
|---|
| 138 | 135 | |
|---|
| .. | .. |
|---|
| 161 | 158 | |
|---|
| 162 | 159 | static inline int wg_queue_enqueue_per_device_and_peer( |
|---|
| 163 | 160 | struct crypt_queue *device_queue, struct prev_queue *peer_queue, |
|---|
| 164 | | - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) |
|---|
| 161 | + struct sk_buff *skb, struct workqueue_struct *wq) |
|---|
| 165 | 162 | { |
|---|
| 166 | 163 | int cpu; |
|---|
| 167 | 164 | |
|---|
| .. | .. |
|---|
| 175 | 172 | /* Then we queue it up in the device queue, which consumes the |
|---|
| 176 | 173 | * packet as soon as it can. |
|---|
| 177 | 174 | */ |
|---|
| 178 | | - cpu = wg_cpumask_next_online(next_cpu); |
|---|
| 175 | + cpu = wg_cpumask_next_online(&device_queue->last_cpu); |
|---|
| 179 | 176 | if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) |
|---|
| 180 | 177 | return -EPIPE; |
|---|
| 181 | 178 | queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); |
|---|