~hc/RK356X_SDK_RELEASE.git

..	..	@@ -17,12 +17,13 @@
17	17	struct wg_peer;
18	18	struct multicore_worker;
19	19	struct crypt_queue;
	20	+struct prev_queue;
20	21	struct sk_buff;
21	22
22	23	/* queueing.c APIs: */
23	24	int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
24		- bool multicore, unsigned int len);
25		-void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
	25	+ unsigned int len);
	26	+void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
26	27	struct multicore_worker __percpu *
27	28	wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
28	29
..	..	@@ -93,13 +94,13 @@
93	94	skb->dev = NULL;
94	95	#ifdef CONFIG_NET_SCHED
95	96	skb->tc_index = 0;
96		- skb_reset_tc(skb);
97	97	#endif
	98	+ skb_reset_redirect(skb);
98	99	skb->hdr_len = skb_headroom(skb);
99	100	skb_reset_mac_header(skb);
100	101	skb_reset_network_header(skb);
101	102	skb_reset_transport_header(skb);
102		- skb_probe_transport_header(skb, 0);
	103	+ skb_probe_transport_header(skb);
103	104	skb_reset_inner_headers(skb);
104	105	}
105	106
..	..	@@ -118,26 +119,46 @@
118	119	return cpu;
119	120	}
120	121
121		-/* This function is racy, in the sense that next is unlocked, so it could return
122		- * the same CPU twice. A race-free version of this would be to instead store an
123		- * atomic sequence number, do an increment-and-return, and then iterate through
124		- * every possible CPU until we get to that index -- choose_cpu. However that's
125		- * a bit slower, and it doesn't seem like this potential race actually
126		- * introduces any performance loss, so we live with it.
	122	+/* This function is racy, in the sense that it's called while last_cpu is
	123	+ * unlocked, so it could return the same CPU twice. Adding locking or using
	124	+ * atomic sequence numbers is slower though, and the consequences of racing are
	125	+ * harmless, so live with it.
127	126	*/
128		-static inline int wg_cpumask_next_online(int *next)
	127	+static inline int wg_cpumask_next_online(int *last_cpu)
129	128	{
130		- int cpu = *next;
131		-
132		- while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
133		- cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
134		- *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
	129	+ int cpu = cpumask_next(*last_cpu, cpu_online_mask);
	130	+ if (cpu >= nr_cpu_ids)
	131	+ cpu = cpumask_first(cpu_online_mask);
	132	+ *last_cpu = cpu;
135	133	return cpu;
136	134	}
137	135
	136	+void wg_prev_queue_init(struct prev_queue *queue);
	137	+
	138	+/* Multi producer */
	139	+bool wg_prev_queue_enqueue(struct prev_queue queue, struct sk_buff skb);
	140	+
	141	+/* Single consumer */
	142	+struct sk_buff wg_prev_queue_dequeue(struct prev_queue queue);
	143	+
	144	+/* Single consumer */
	145	+static inline struct sk_buff wg_prev_queue_peek(struct prev_queue queue)
	146	+{
	147	+ if (queue->peeked)
	148	+ return queue->peeked;
	149	+ queue->peeked = wg_prev_queue_dequeue(queue);
	150	+ return queue->peeked;
	151	+}
	152	+
	153	+/* Single consumer */
	154	+static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
	155	+{
	156	+ queue->peeked = NULL;
	157	+}
	158	+
138	159	static inline int wg_queue_enqueue_per_device_and_peer(
139		- struct crypt_queue device_queue, struct crypt_queue peer_queue,
140		- struct sk_buff skb, struct workqueue_struct wq, int *next_cpu)
	160	+ struct crypt_queue device_queue, struct prev_queue peer_queue,
	161	+ struct sk_buff skb, struct workqueue_struct wq)
141	162	{
142	163	int cpu;
143	164
..	..	@@ -145,21 +166,20 @@
145	166	/* We first queue this up for the peer ingestion, but the consumer
146	167	* will wait for the state to change to CRYPTED or DEAD before.
147	168	*/
148		- if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
	169	+ if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb)))
149	170	return -ENOSPC;
	171	+
150	172	/* Then we queue it up in the device queue, which consumes the
151	173	* packet as soon as it can.
152	174	*/
153		- cpu = wg_cpumask_next_online(next_cpu);
	175	+ cpu = wg_cpumask_next_online(&device_queue->last_cpu);
154	176	if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
155	177	return -EPIPE;
156	178	queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
157	179	return 0;
158	180	}
159	181
160		-static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
161		- struct sk_buff *skb,
162		- enum packet_state state)
	182	+static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state)
163	183	{
164	184	/* We take a reference, because as soon as we call atomic_set, the
165	185	* peer can be freed from below us.
..	..	@@ -167,14 +187,12 @@
167	187	struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
168	188
169	189	atomic_set_release(&PACKET_CB(skb)->state, state);
170		- queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
171		- peer->internal_id),
172		- peer->device->packet_crypt_wq, &queue->work);
	190	+ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
	191	+ peer->device->packet_crypt_wq, &peer->transmit_packet_work);
173	192	wg_peer_put(peer);
174	193	}
175	194
176		-static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
177		- enum packet_state state)
	195	+static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state)
178	196	{
179	197	/* We take a reference, because as soon as we call atomic_set, the
180	198	* peer can be freed from below us.