hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/net/openvswitch/actions.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2007-2017 Nicira, Inc.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
12
- *
13
- * You should have received a copy of the GNU General Public License
14
- * along with this program; if not, write to the Free Software
15
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16
- * 02110-1301, USA
174 */
185
196 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -22,7 +9,6 @@
229 #include <linux/in.h>
2310 #include <linux/ip.h>
2411 #include <linux/openvswitch.h>
25
-#include <linux/netfilter_ipv6.h>
2612 #include <linux/sctp.h>
2713 #include <linux/tcp.h>
2814 #include <linux/udp.h>
....@@ -169,49 +155,21 @@
169155 const struct nlattr *actions, int len,
170156 bool last, bool clone_flow_key);
171157
172
-static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
173
- __be16 ethertype)
174
-{
175
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
176
- __be16 diff[] = { ~(hdr->h_proto), ethertype };
177
-
178
- skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
179
- }
180
-
181
- hdr->h_proto = ethertype;
182
-}
158
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
159
+ struct sw_flow_key *key,
160
+ const struct nlattr *attr, int len);
183161
184162 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
185
- const struct ovs_action_push_mpls *mpls)
163
+ __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
186164 {
187
- struct mpls_shim_hdr *new_mpls_lse;
165
+ int err;
188166
189
- /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
190
- if (skb->encapsulation)
191
- return -ENOTSUPP;
167
+ err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
168
+ if (err)
169
+ return err;
192170
193
- if (skb_cow_head(skb, MPLS_HLEN) < 0)
194
- return -ENOMEM;
195
-
196
- if (!skb->inner_protocol) {
197
- skb_set_inner_network_header(skb, skb->mac_len);
198
- skb_set_inner_protocol(skb, skb->protocol);
199
- }
200
-
201
- skb_push(skb, MPLS_HLEN);
202
- memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
203
- skb->mac_len);
204
- skb_reset_mac_header(skb);
205
- skb_set_network_header(skb, skb->mac_len);
206
-
207
- new_mpls_lse = mpls_hdr(skb);
208
- new_mpls_lse->label_stack_entry = mpls->mpls_lse;
209
-
210
- skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
211
-
212
- if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
213
- update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
214
- skb->protocol = mpls->mpls_ethertype;
171
+ if (!mac_len)
172
+ key->mac_proto = MAC_PROTO_NONE;
215173
216174 invalidate_flow_key(key);
217175 return 0;
....@@ -222,30 +180,13 @@
222180 {
223181 int err;
224182
225
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
226
- if (unlikely(err))
183
+ err = skb_mpls_pop(skb, ethertype, skb->mac_len,
184
+ ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
185
+ if (err)
227186 return err;
228187
229
- skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
230
-
231
- memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
232
- skb->mac_len);
233
-
234
- __skb_pull(skb, MPLS_HLEN);
235
- skb_reset_mac_header(skb);
236
- skb_set_network_header(skb, skb->mac_len);
237
-
238
- if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
239
- struct ethhdr *hdr;
240
-
241
- /* mpls_hdr() is used to locate the ethertype field correctly in the
242
- * presence of VLAN tags.
243
- */
244
- hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
245
- update_ethertype(skb, hdr, ethertype);
246
- }
247
- if (eth_p_mpls(skb->protocol))
248
- skb->protocol = ethertype;
188
+ if (ethertype == htons(ETH_P_TEB))
189
+ key->mac_proto = MAC_PROTO_ETHERNET;
249190
250191 invalidate_flow_key(key);
251192 return 0;
....@@ -258,20 +199,16 @@
258199 __be32 lse;
259200 int err;
260201
261
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
262
- if (unlikely(err))
263
- return err;
202
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
203
+ return -ENOMEM;
264204
265205 stack = mpls_hdr(skb);
266206 lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
267
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
268
- __be32 diff[] = { ~(stack->label_stack_entry), lse };
207
+ err = skb_mpls_update_lse(skb, lse);
208
+ if (err)
209
+ return err;
269210
270
- skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
271
- }
272
-
273
- stack->label_stack_entry = lse;
274
- flow_key->mpls.top_lse = lse;
211
+ flow_key->mpls.lse[0] = lse;
275212 return 0;
276213 }
277214
....@@ -299,7 +236,7 @@
299236 key->eth.vlan.tpid = vlan->vlan_tpid;
300237 }
301238 return skb_vlan_push(skb, vlan->vlan_tpid,
302
- ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
239
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
303240 }
304241
305242 /* 'src' is already properly masked. */
....@@ -343,9 +280,11 @@
343280 */
344281 static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
345282 {
346
- skb_pull_rcsum(skb, ETH_HLEN);
347
- skb_reset_mac_header(skb);
348
- skb_reset_mac_len(skb);
283
+ int err;
284
+
285
+ err = skb_eth_pop(skb);
286
+ if (err)
287
+ return err;
349288
350289 /* safe right before invalidate_flow_key */
351290 key->mac_proto = MAC_PROTO_NONE;
....@@ -356,22 +295,12 @@
356295 static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
357296 const struct ovs_action_push_eth *ethh)
358297 {
359
- struct ethhdr *hdr;
298
+ int err;
360299
361
- /* Add the new Ethernet header */
362
- if (skb_cow_head(skb, ETH_HLEN) < 0)
363
- return -ENOMEM;
364
-
365
- skb_push(skb, ETH_HLEN);
366
- skb_reset_mac_header(skb);
367
- skb_reset_mac_len(skb);
368
-
369
- hdr = eth_hdr(skb);
370
- ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
371
- ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
372
- hdr->h_proto = skb->protocol;
373
-
374
- skb_postpush_rcsum(skb, hdr, ETH_HLEN);
300
+ err = skb_eth_push(skb, ethh->addresses.eth_dst,
301
+ ethh->addresses.eth_src);
302
+ if (err)
303
+ return err;
375304
376305 /* safe right before invalidate_flow_key */
377306 key->mac_proto = MAC_PROTO_ETHERNET;
....@@ -443,6 +372,7 @@
443372 update_ip_l4_checksum(skb, nh, *addr, new_addr);
444373 csum_replace4(&nh->check, *addr, new_addr);
445374 skb_clear_hash(skb);
375
+ ovs_ct_clear(skb, NULL);
446376 *addr = new_addr;
447377 }
448378
....@@ -490,6 +420,7 @@
490420 update_ipv6_checksum(skb, l4_proto, addr, new_addr);
491421
492422 skb_clear_hash(skb);
423
+ ovs_ct_clear(skb, NULL);
493424 memcpy(addr, new_addr, sizeof(__be32[4]));
494425 }
495426
....@@ -730,6 +661,7 @@
730661 static void set_tp_port(struct sk_buff *skb, __be16 *port,
731662 __be16 new_port, __sum16 *check)
732663 {
664
+ ovs_ct_clear(skb, NULL);
733665 inet_proto_csum_replace2(check, skb, *port, new_port, false);
734666 *port = new_port;
735667 }
....@@ -769,6 +701,7 @@
769701 uh->dest = dst;
770702 flow_key->tp.src = src;
771703 flow_key->tp.dst = dst;
704
+ ovs_ct_clear(skb, NULL);
772705 }
773706
774707 skb_clear_hash(skb);
....@@ -831,13 +764,16 @@
831764 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
832765
833766 skb_clear_hash(skb);
767
+ ovs_ct_clear(skb, NULL);
768
+
834769 flow_key->tp.src = sh->source;
835770 flow_key->tp.dst = sh->dest;
836771
837772 return 0;
838773 }
839774
840
-static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
775
+static int ovs_vport_output(struct net *net, struct sock *sk,
776
+ struct sk_buff *skb)
841777 {
842778 struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
843779 struct vport *vport = data->vport;
....@@ -850,8 +786,10 @@
850786 __skb_dst_copy(skb, data->dst);
851787 *OVS_CB(skb) = data->cb;
852788 skb->inner_protocol = data->inner_protocol;
853
- skb->vlan_tci = data->vlan_tci;
854
- skb->vlan_proto = data->vlan_proto;
789
+ if (data->vlan_tci & VLAN_CFI_MASK)
790
+ __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
791
+ else
792
+ __vlan_hwaccel_clear_tag(skb);
855793
856794 /* Reconstruct the MAC header. */
857795 skb_push(skb, data->l2_len);
....@@ -895,7 +833,10 @@
895833 data->cb = *OVS_CB(skb);
896834 data->inner_protocol = skb->inner_protocol;
897835 data->network_offset = orig_network_offset;
898
- data->vlan_tci = skb->vlan_tci;
836
+ if (skb_vlan_tag_present(skb))
837
+ data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
838
+ else
839
+ data->vlan_tci = 0;
899840 data->vlan_proto = skb->vlan_proto;
900841 data->mac_proto = mac_proto;
901842 data->l2_len = hlen;
....@@ -938,12 +879,8 @@
938879 ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
939880 refdst_drop(orig_dst);
940881 } else if (key->eth.type == htons(ETH_P_IPV6)) {
941
- const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
942882 unsigned long orig_dst;
943883 struct rt6_info ovs_rt;
944
-
945
- if (!v6ops)
946
- goto err;
947884
948885 prepare_frag(vport, skb, orig_network_offset,
949886 ovs_key_mac_proto(key));
....@@ -956,7 +893,7 @@
956893 skb_dst_set_noref(skb, &ovs_rt.dst);
957894 IP6CB(skb)->frag_max_size = mru;
958895
959
- v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
896
+ ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
960897 refdst_drop(orig_dst);
961898 } else {
962899 WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
....@@ -1015,7 +952,7 @@
1015952 upcall.mru = OVS_CB(skb)->mru;
1016953
1017954 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
1018
- a = nla_next(a, &rem)) {
955
+ a = nla_next(a, &rem)) {
1019956 switch (nla_type(a)) {
1020957 case OVS_USERSPACE_ATTR_USERDATA:
1021958 upcall.userdata = a;
....@@ -1052,6 +989,21 @@
1052989 }
1053990
1054991 return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
992
+}
993
+
994
+static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
995
+ struct sw_flow_key *key,
996
+ const struct nlattr *attr, bool last)
997
+{
998
+ /* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
999
+ struct nlattr *actions = nla_data(attr);
1000
+
1001
+ if (nla_len(actions))
1002
+ return clone_execute(dp, skb, key, 0, nla_data(actions),
1003
+ nla_len(actions), last, false);
1004
+
1005
+ consume_skb(skb);
1006
+ return 0;
10551007 }
10561008
10571009 /* When 'last' is true, sample() should always consume the 'skb'.
....@@ -1098,7 +1050,7 @@
10981050 int rem = nla_len(attr);
10991051 bool dont_clone_flow_key;
11001052
1101
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
1053
+ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
11021054 clone_arg = nla_data(attr);
11031055 dont_clone_flow_key = nla_get_u32(clone_arg);
11041056 actions = nla_next(clone_arg, &rem);
....@@ -1236,6 +1188,84 @@
12361188 return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
12371189 }
12381190
1191
+static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
1192
+ struct sw_flow_key *key,
1193
+ const struct nlattr *attr, bool last)
1194
+{
1195
+ struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
1196
+ const struct nlattr *actions, *cpl_arg;
1197
+ int len, max_len, rem = nla_len(attr);
1198
+ const struct check_pkt_len_arg *arg;
1199
+ bool clone_flow_key;
1200
+
1201
+ /* The first netlink attribute in 'attr' is always
1202
+ * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
1203
+ */
1204
+ cpl_arg = nla_data(attr);
1205
+ arg = nla_data(cpl_arg);
1206
+
1207
+ len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
1208
+ max_len = arg->pkt_len;
1209
+
1210
+ if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
1211
+ len <= max_len) {
1212
+ /* Second netlink attribute in 'attr' is always
1213
+ * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
1214
+ */
1215
+ actions = nla_next(cpl_arg, &rem);
1216
+ clone_flow_key = !arg->exec_for_lesser_equal;
1217
+ } else {
1218
+ /* Third netlink attribute in 'attr' is always
1219
+ * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
1220
+ */
1221
+ actions = nla_next(cpl_arg, &rem);
1222
+ actions = nla_next(actions, &rem);
1223
+ clone_flow_key = !arg->exec_for_greater;
1224
+ }
1225
+
1226
+ return clone_execute(dp, skb, key, 0, nla_data(actions),
1227
+ nla_len(actions), last, clone_flow_key);
1228
+}
1229
+
1230
+static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
1231
+{
1232
+ int err;
1233
+
1234
+ if (skb->protocol == htons(ETH_P_IPV6)) {
1235
+ struct ipv6hdr *nh;
1236
+
1237
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
1238
+ sizeof(*nh));
1239
+ if (unlikely(err))
1240
+ return err;
1241
+
1242
+ nh = ipv6_hdr(skb);
1243
+
1244
+ if (nh->hop_limit <= 1)
1245
+ return -EHOSTUNREACH;
1246
+
1247
+ key->ip.ttl = --nh->hop_limit;
1248
+ } else if (skb->protocol == htons(ETH_P_IP)) {
1249
+ struct iphdr *nh;
1250
+ u8 old_ttl;
1251
+
1252
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
1253
+ sizeof(*nh));
1254
+ if (unlikely(err))
1255
+ return err;
1256
+
1257
+ nh = ip_hdr(skb);
1258
+ if (nh->ttl <= 1)
1259
+ return -EHOSTUNREACH;
1260
+
1261
+ old_ttl = nh->ttl--;
1262
+ csum_replace2(&nh->check, htons(old_ttl << 8),
1263
+ htons(nh->ttl << 8));
1264
+ key->ip.ttl = nh->ttl;
1265
+ }
1266
+ return 0;
1267
+}
1268
+
12391269 /* Execute a list of actions against 'skb'. */
12401270 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
12411271 struct sw_flow_key *key,
....@@ -1289,10 +1319,24 @@
12891319 execute_hash(skb, key, a);
12901320 break;
12911321
1292
- case OVS_ACTION_ATTR_PUSH_MPLS:
1293
- err = push_mpls(skb, key, nla_data(a));
1294
- break;
1322
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
1323
+ struct ovs_action_push_mpls *mpls = nla_data(a);
12951324
1325
+ err = push_mpls(skb, key, mpls->mpls_lse,
1326
+ mpls->mpls_ethertype, skb->mac_len);
1327
+ break;
1328
+ }
1329
+ case OVS_ACTION_ATTR_ADD_MPLS: {
1330
+ struct ovs_action_add_mpls *mpls = nla_data(a);
1331
+ __u16 mac_len = 0;
1332
+
1333
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
1334
+ mac_len = skb->mac_len;
1335
+
1336
+ err = push_mpls(skb, key, mpls->mpls_lse,
1337
+ mpls->mpls_ethertype, mac_len);
1338
+ break;
1339
+ }
12961340 case OVS_ACTION_ATTR_POP_MPLS:
12971341 err = pop_mpls(skb, key, nla_get_be16(a));
12981342 break;
....@@ -1397,6 +1441,25 @@
13971441
13981442 break;
13991443 }
1444
+
1445
+ case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
1446
+ bool last = nla_is_last(a, rem);
1447
+
1448
+ err = execute_check_pkt_len(dp, skb, key, a, last);
1449
+ if (last)
1450
+ return err;
1451
+
1452
+ break;
1453
+ }
1454
+
1455
+ case OVS_ACTION_ATTR_DEC_TTL:
1456
+ err = execute_dec_ttl(skb, key);
1457
+ if (err == -EHOSTUNREACH) {
1458
+ err = dec_ttl_exception_handler(dp, skb, key,
1459
+ a, true);
1460
+ return err;
1461
+ }
1462
+ break;
14001463 }
14011464
14021465 if (unlikely(err)) {