hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/net/ipv4/esp4.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #define pr_fmt(fmt) "IPsec: " fmt
23
34 #include <crypto/aead.h>
....@@ -17,6 +18,8 @@
1718 #include <net/icmp.h>
1819 #include <net/protocol.h>
1920 #include <net/udp.h>
21
+#include <net/tcp.h>
22
+#include <net/espintcp.h>
2023
2124 #include <linux/highmem.h>
2225
....@@ -31,8 +34,6 @@
3134 };
3235
3336 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
34
-
35
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
3637
3738 /*
3839 * Allocate an AEAD request structure with extra space for SG and IV.
....@@ -118,6 +119,132 @@
118119 put_page(sg_page(sg));
119120 }
120121
122
+#ifdef CONFIG_INET_ESPINTCP
123
+struct esp_tcp_sk {
124
+ struct sock *sk;
125
+ struct rcu_head rcu;
126
+};
127
+
128
+static void esp_free_tcp_sk(struct rcu_head *head)
129
+{
130
+ struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
131
+
132
+ sock_put(esk->sk);
133
+ kfree(esk);
134
+}
135
+
136
+static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
137
+{
138
+ struct xfrm_encap_tmpl *encap = x->encap;
139
+ struct esp_tcp_sk *esk;
140
+ __be16 sport, dport;
141
+ struct sock *nsk;
142
+ struct sock *sk;
143
+
144
+ sk = rcu_dereference(x->encap_sk);
145
+ if (sk && sk->sk_state == TCP_ESTABLISHED)
146
+ return sk;
147
+
148
+ spin_lock_bh(&x->lock);
149
+ sport = encap->encap_sport;
150
+ dport = encap->encap_dport;
151
+ nsk = rcu_dereference_protected(x->encap_sk,
152
+ lockdep_is_held(&x->lock));
153
+ if (sk && sk == nsk) {
154
+ esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
155
+ if (!esk) {
156
+ spin_unlock_bh(&x->lock);
157
+ return ERR_PTR(-ENOMEM);
158
+ }
159
+ RCU_INIT_POINTER(x->encap_sk, NULL);
160
+ esk->sk = sk;
161
+ call_rcu(&esk->rcu, esp_free_tcp_sk);
162
+ }
163
+ spin_unlock_bh(&x->lock);
164
+
165
+ sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
166
+ dport, x->props.saddr.a4, sport, 0);
167
+ if (!sk)
168
+ return ERR_PTR(-ENOENT);
169
+
170
+ if (!tcp_is_ulp_esp(sk)) {
171
+ sock_put(sk);
172
+ return ERR_PTR(-EINVAL);
173
+ }
174
+
175
+ spin_lock_bh(&x->lock);
176
+ nsk = rcu_dereference_protected(x->encap_sk,
177
+ lockdep_is_held(&x->lock));
178
+ if (encap->encap_sport != sport ||
179
+ encap->encap_dport != dport) {
180
+ sock_put(sk);
181
+ sk = nsk ?: ERR_PTR(-EREMCHG);
182
+ } else if (sk == nsk) {
183
+ sock_put(sk);
184
+ } else {
185
+ rcu_assign_pointer(x->encap_sk, sk);
186
+ }
187
+ spin_unlock_bh(&x->lock);
188
+
189
+ return sk;
190
+}
191
+
192
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
193
+{
194
+ struct sock *sk;
195
+ int err;
196
+
197
+ rcu_read_lock();
198
+
199
+ sk = esp_find_tcp_sk(x);
200
+ err = PTR_ERR_OR_ZERO(sk);
201
+ if (err)
202
+ goto out;
203
+
204
+ bh_lock_sock(sk);
205
+ if (sock_owned_by_user(sk))
206
+ err = espintcp_queue_out(sk, skb);
207
+ else
208
+ err = espintcp_push_skb(sk, skb);
209
+ bh_unlock_sock(sk);
210
+
211
+out:
212
+ rcu_read_unlock();
213
+ return err;
214
+}
215
+
216
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
217
+ struct sk_buff *skb)
218
+{
219
+ struct dst_entry *dst = skb_dst(skb);
220
+ struct xfrm_state *x = dst->xfrm;
221
+
222
+ return esp_output_tcp_finish(x, skb);
223
+}
224
+
225
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
226
+{
227
+ int err;
228
+
229
+ local_bh_disable();
230
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
231
+ local_bh_enable();
232
+
233
+ /* EINPROGRESS just happens to do the right thing. It
234
+ * actually means that the skb has been consumed and
235
+ * isn't coming back.
236
+ */
237
+ return err ?: -EINPROGRESS;
238
+}
239
+#else
240
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
241
+{
242
+ kfree_skb(skb);
243
+
244
+ return -EOPNOTSUPP;
245
+}
246
+#endif
247
+
121248 static void esp_output_done(struct crypto_async_request *base, int err)
122249 {
123250 struct sk_buff *skb = base->data;
....@@ -125,10 +252,13 @@
125252 void *tmp;
126253 struct xfrm_state *x;
127254
128
- if (xo && (xo->flags & XFRM_DEV_RESUME))
129
- x = skb->sp->xvec[skb->sp->len - 1];
130
- else
255
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
256
+ struct sec_path *sp = skb_sec_path(skb);
257
+
258
+ x = sp->xvec[sp->len - 1];
259
+ } else {
131260 x = skb_dst(skb)->xfrm;
261
+ }
132262
133263 tmp = ESP_SKB_CB(skb)->tmp;
134264 esp_ssg_unref(x, tmp);
....@@ -145,7 +275,11 @@
145275 secpath_reset(skb);
146276 xfrm_dev_resume(skb);
147277 } else {
148
- xfrm_output_resume(skb, err);
278
+ if (!err &&
279
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
280
+ esp_output_tail_tcp(x, skb);
281
+ else
282
+ xfrm_output_resume(skb, err);
149283 }
150284 }
151285
....@@ -207,31 +341,79 @@
207341 esp_output_done(base, err);
208342 }
209343
210
-static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
344
+static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
345
+ int encap_type,
346
+ struct esp_info *esp,
347
+ __be16 sport,
348
+ __be16 dport)
211349 {
212
- /* Fill padding... */
213
- if (tfclen) {
214
- memset(tail, 0, tfclen);
215
- tail += tfclen;
216
- }
217
- do {
218
- int i;
219
- for (i = 0; i < plen - 2; i++)
220
- tail[i] = i + 1;
221
- } while (0);
222
- tail[plen - 2] = plen - 2;
223
- tail[plen - 1] = proto;
224
-}
225
-
226
-static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
227
-{
228
- int encap_type;
229350 struct udphdr *uh;
230351 __be32 *udpdata32;
231
- __be16 sport, dport;
232
- struct xfrm_encap_tmpl *encap = x->encap;
233
- struct ip_esp_hdr *esph = esp->esph;
234352 unsigned int len;
353
+
354
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
355
+ if (len + sizeof(struct iphdr) > IP_MAX_MTU)
356
+ return ERR_PTR(-EMSGSIZE);
357
+
358
+ uh = (struct udphdr *)esp->esph;
359
+ uh->source = sport;
360
+ uh->dest = dport;
361
+ uh->len = htons(len);
362
+ uh->check = 0;
363
+
364
+ *skb_mac_header(skb) = IPPROTO_UDP;
365
+
366
+ if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
367
+ udpdata32 = (__be32 *)(uh + 1);
368
+ udpdata32[0] = udpdata32[1] = 0;
369
+ return (struct ip_esp_hdr *)(udpdata32 + 2);
370
+ }
371
+
372
+ return (struct ip_esp_hdr *)(uh + 1);
373
+}
374
+
375
+#ifdef CONFIG_INET_ESPINTCP
376
+static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
377
+ struct sk_buff *skb,
378
+ struct esp_info *esp)
379
+{
380
+ __be16 *lenp = (void *)esp->esph;
381
+ struct ip_esp_hdr *esph;
382
+ unsigned int len;
383
+ struct sock *sk;
384
+
385
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
386
+ if (len > IP_MAX_MTU)
387
+ return ERR_PTR(-EMSGSIZE);
388
+
389
+ rcu_read_lock();
390
+ sk = esp_find_tcp_sk(x);
391
+ rcu_read_unlock();
392
+
393
+ if (IS_ERR(sk))
394
+ return ERR_CAST(sk);
395
+
396
+ *lenp = htons(len);
397
+ esph = (struct ip_esp_hdr *)(lenp + 1);
398
+
399
+ return esph;
400
+}
401
+#else
402
+static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
403
+ struct sk_buff *skb,
404
+ struct esp_info *esp)
405
+{
406
+ return ERR_PTR(-EOPNOTSUPP);
407
+}
408
+#endif
409
+
410
+static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
411
+ struct esp_info *esp)
412
+{
413
+ struct xfrm_encap_tmpl *encap = x->encap;
414
+ struct ip_esp_hdr *esph;
415
+ __be16 sport, dport;
416
+ int encap_type;
235417
236418 spin_lock_bh(&x->lock);
237419 sport = encap->encap_sport;
....@@ -239,29 +421,20 @@
239421 encap_type = encap->encap_type;
240422 spin_unlock_bh(&x->lock);
241423
242
- len = skb->len + esp->tailen - skb_transport_offset(skb);
243
- if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
244
- return -EMSGSIZE;
245
-
246
- uh = (struct udphdr *)esph;
247
- uh->source = sport;
248
- uh->dest = dport;
249
- uh->len = htons(len);
250
- uh->check = 0;
251
-
252424 switch (encap_type) {
253425 default:
254426 case UDP_ENCAP_ESPINUDP:
255
- esph = (struct ip_esp_hdr *)(uh + 1);
256
- break;
257427 case UDP_ENCAP_ESPINUDP_NON_IKE:
258
- udpdata32 = (__be32 *)(uh + 1);
259
- udpdata32[0] = udpdata32[1] = 0;
260
- esph = (struct ip_esp_hdr *)(udpdata32 + 2);
428
+ esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
429
+ break;
430
+ case TCP_ENCAP_ESPINTCP:
431
+ esph = esp_output_tcp_encap(x, skb, esp);
261432 break;
262433 }
263434
264
- *skb_mac_header(skb) = IPPROTO_UDP;
435
+ if (IS_ERR(esph))
436
+ return PTR_ERR(esph);
437
+
265438 esp->esph = esph;
266439
267440 return 0;
....@@ -276,13 +449,17 @@
276449 struct sk_buff *trailer;
277450 int tailen = esp->tailen;
278451
279
- /* this is non-NULL only with UDP Encapsulation */
452
+ /* this is non-NULL only with TCP/UDP Encapsulation */
280453 if (x->encap) {
281
- int err = esp_output_udp_encap(x, skb, esp);
454
+ int err = esp_output_encap(x, skb, esp);
282455
283456 if (err < 0)
284457 return err;
285458 }
459
+
460
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
461
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
462
+ goto cow;
286463
287464 if (!skb_cloned(skb)) {
288465 if (tailen <= skb_tailroom(skb)) {
....@@ -467,6 +644,9 @@
467644 if (sg != dsg)
468645 esp_ssg_unref(x, tmp);
469646
647
+ if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
648
+ err = esp_output_tail_tcp(x, skb);
649
+
470650 error_free:
471651 kfree(tmp);
472652 error:
....@@ -497,7 +677,7 @@
497677 struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
498678 u32 padto;
499679
500
- padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
680
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
501681 if (skb->len < padto)
502682 esp.tfclen = padto - skb->len;
503683 }
....@@ -593,7 +773,23 @@
593773
594774 if (x->encap) {
595775 struct xfrm_encap_tmpl *encap = x->encap;
776
+ struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);
596777 struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
778
+ __be16 source;
779
+
780
+ switch (x->encap->encap_type) {
781
+ case TCP_ENCAP_ESPINTCP:
782
+ source = th->source;
783
+ break;
784
+ case UDP_ENCAP_ESPINUDP:
785
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
786
+ source = uh->source;
787
+ break;
788
+ default:
789
+ WARN_ON_ONCE(1);
790
+ err = -EINVAL;
791
+ goto out;
792
+ }
597793
598794 /*
599795 * 1) if the NAT-T peer's IP or port changed then
....@@ -602,11 +798,11 @@
602798 * SRC ports.
603799 */
604800 if (iph->saddr != x->props.saddr.a4 ||
605
- uh->source != encap->encap_sport) {
801
+ source != encap->encap_sport) {
606802 xfrm_address_t ipaddr;
607803
608804 ipaddr.a4 = iph->saddr;
609
- km_new_mapping(x, &ipaddr, uh->source);
805
+ km_new_mapping(x, &ipaddr, source);
610806
611807 /* XXX: perhaps add an extra
612808 * policy check here, to see
....@@ -688,12 +884,11 @@
688884 */
689885 static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
690886 {
691
- struct ip_esp_hdr *esph;
692887 struct crypto_aead *aead = x->data;
693888 struct aead_request *req;
694889 struct sk_buff *trailer;
695890 int ivlen = crypto_aead_ivsize(aead);
696
- int elen = skb->len - sizeof(*esph) - ivlen;
891
+ int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
697892 int nfrags;
698893 int assoclen;
699894 int seqhilen;
....@@ -703,13 +898,13 @@
703898 struct scatterlist *sg;
704899 int err = -EINVAL;
705900
706
- if (!pskb_may_pull(skb, sizeof(*esph) + ivlen))
901
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen))
707902 goto out;
708903
709904 if (elen <= 0)
710905 goto out;
711906
712
- assoclen = sizeof(*esph);
907
+ assoclen = sizeof(struct ip_esp_hdr);
713908 seqhilen = 0;
714909
715910 if (x->props.flags & XFRM_STATE_ESN) {
....@@ -780,28 +975,6 @@
780975 return err;
781976 }
782977
783
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
784
-{
785
- struct crypto_aead *aead = x->data;
786
- u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
787
- unsigned int net_adj;
788
-
789
- switch (x->props.mode) {
790
- case XFRM_MODE_TRANSPORT:
791
- case XFRM_MODE_BEET:
792
- net_adj = sizeof(struct iphdr);
793
- break;
794
- case XFRM_MODE_TUNNEL:
795
- net_adj = 0;
796
- break;
797
- default:
798
- BUG();
799
- }
800
-
801
- return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
802
- net_adj) & ~(blksize - 1)) + net_adj - 2;
803
-}
804
-
805978 static int esp4_err(struct sk_buff *skb, u32 info)
806979 {
807980 struct net *net = dev_net(skb->dev);
....@@ -825,9 +998,9 @@
825998 return 0;
826999
8271000 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
828
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
1001
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ESP);
8291002 else
830
- ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
1003
+ ipv4_redirect(skb, net, 0, IPPROTO_ESP);
8311004 xfrm_state_put(x);
8321005
8331006 return 0;
....@@ -961,7 +1134,7 @@
9611134 err = crypto_aead_setkey(aead, key, keylen);
9621135
9631136 free_key:
964
- kfree(key);
1137
+ kfree_sensitive(key);
9651138
9661139 error:
9671140 return err;
....@@ -1004,6 +1177,14 @@
10041177 case UDP_ENCAP_ESPINUDP_NON_IKE:
10051178 x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
10061179 break;
1180
+#ifdef CONFIG_INET_ESPINTCP
1181
+ case TCP_ENCAP_ESPINTCP:
1182
+ /* only the length field, TCP encap is done by
1183
+ * the socket
1184
+ */
1185
+ x->props.header_len += 2;
1186
+ break;
1187
+#endif
10071188 }
10081189 }
10091190
....@@ -1027,7 +1208,6 @@
10271208 .flags = XFRM_TYPE_REPLAY_PROT,
10281209 .init_state = esp_init_state,
10291210 .destructor = esp_destroy,
1030
- .get_mtu = esp4_get_mtu,
10311211 .input = esp_input,
10321212 .output = esp_output,
10331213 };
....@@ -1058,8 +1238,7 @@
10581238 {
10591239 if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0)
10601240 pr_info("%s: can't remove protocol\n", __func__);
1061
- if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
1062
- pr_info("%s: can't remove xfrm type\n", __func__);
1241
+ xfrm_unregister_type(&esp_type, AF_INET);
10631242 }
10641243
10651244 module_init(esp4_init);