hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/openvswitch/flow.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2007-2014 Nicira, Inc.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
12
- *
13
- * You should have received a copy of the GNU General Public License
14
- * along with this program; if not, write to the Free Software
15
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16
- * 02110-1301, USA
174 */
185
196 #include <linux/uaccess.h>
....@@ -72,7 +59,7 @@
7259 void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
7360 const struct sk_buff *skb)
7461 {
75
- struct flow_stats *stats;
62
+ struct sw_flow_stats *stats;
7663 unsigned int cpu = smp_processor_id();
7764 int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
7865
....@@ -100,7 +87,7 @@
10087 if (likely(flow->stats_last_writer != -1) &&
10188 likely(!rcu_access_pointer(flow->stats[cpu]))) {
10289 /* Try to allocate CPU-specific stats. */
103
- struct flow_stats *new_stats;
90
+ struct sw_flow_stats *new_stats;
10491
10592 new_stats =
10693 kmem_cache_alloc_node(flow_stats_cache,
....@@ -147,7 +134,7 @@
147134
148135 /* We open code this to make sure cpu 0 is always considered */
149136 for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
150
- struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
137
+ struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
151138
152139 if (stats) {
153140 /* Local CPU may write on non-local stats, so we must
....@@ -171,7 +158,7 @@
171158
172159 /* We open code this to make sure cpu 0 is always considered */
173160 for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
174
- struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
161
+ struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
175162
176163 if (stats) {
177164 spin_lock_bh(&stats->lock);
....@@ -254,21 +241,18 @@
254241
255242 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
256243 {
244
+ unsigned short frag_off;
245
+ unsigned int payload_ofs = 0;
257246 unsigned int nh_ofs = skb_network_offset(skb);
258247 unsigned int nh_len;
259
- int payload_ofs;
260248 struct ipv6hdr *nh;
261
- uint8_t nexthdr;
262
- __be16 frag_off;
263
- int err;
249
+ int err, nexthdr, flags = 0;
264250
265251 err = check_header(skb, nh_ofs + sizeof(*nh));
266252 if (unlikely(err))
267253 return err;
268254
269255 nh = ipv6_hdr(skb);
270
- nexthdr = nh->nexthdr;
271
- payload_ofs = (u8 *)(nh + 1) - skb->data;
272256
273257 key->ip.proto = NEXTHDR_NONE;
274258 key->ip.tos = ipv6_get_dsfield(nh);
....@@ -277,22 +261,23 @@
277261 key->ipv6.addr.src = nh->saddr;
278262 key->ipv6.addr.dst = nh->daddr;
279263
280
- payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
281
-
282
- if (frag_off) {
283
- if (frag_off & htons(~0x7))
264
+ nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
265
+ if (flags & IP6_FH_F_FRAG) {
266
+ if (frag_off) {
284267 key->ip.frag = OVS_FRAG_TYPE_LATER;
285
- else
286
- key->ip.frag = OVS_FRAG_TYPE_FIRST;
268
+ key->ip.proto = NEXTHDR_FRAGMENT;
269
+ return 0;
270
+ }
271
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
287272 } else {
288273 key->ip.frag = OVS_FRAG_TYPE_NONE;
289274 }
290275
291
- /* Delayed handling of error in ipv6_skip_exthdr() as it
292
- * always sets frag_off to a valid value which may be
276
+ /* Delayed handling of error in ipv6_find_hdr() as it
277
+ * always sets flags and frag_off to a valid value which may be
293278 * used to set key->ip.frag above.
294279 */
295
- if (unlikely(payload_ofs < 0))
280
+ if (unlikely(nexthdr < 0))
296281 return -EPROTO;
297282
298283 nh_len = payload_ofs - nh_ofs;
....@@ -329,7 +314,7 @@
329314 return -ENOMEM;
330315
331316 vh = (struct vlan_head *)skb->data;
332
- key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
317
+ key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
333318 key_vh->tpid = vh->tpid;
334319
335320 if (unlikely(untag_vlan)) {
....@@ -362,7 +347,7 @@
362347 int res;
363348
364349 if (skb_vlan_tag_present(skb)) {
365
- key->eth.vlan.tci = htons(skb->vlan_tci);
350
+ key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
366351 key->eth.vlan.tpid = skb->vlan_proto;
367352 } else {
368353 /* Parse outer vlan tag in the non-accelerated case. */
....@@ -538,78 +523,15 @@
538523 }
539524
540525 /**
541
- * key_extract - extracts a flow key from an Ethernet frame.
526
+ * key_extract_l3l4 - extracts L3/L4 header information.
542527 * @skb: sk_buff that contains the frame, with skb->data pointing to the
543
- * Ethernet header
528
+ * L3 header
544529 * @key: output flow key
545530 *
546
- * The caller must ensure that skb->len >= ETH_HLEN.
547
- *
548
- * Returns 0 if successful, otherwise a negative errno value.
549
- *
550
- * Initializes @skb header fields as follows:
551
- *
552
- * - skb->mac_header: the L2 header.
553
- *
554
- * - skb->network_header: just past the L2 header, or just past the
555
- * VLAN header, to the first byte of the L2 payload.
556
- *
557
- * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
558
- * on output, then just past the IP header, if one is present and
559
- * of a correct length, otherwise the same as skb->network_header.
560
- * For other key->eth.type values it is left untouched.
561
- *
562
- * - skb->protocol: the type of the data starting at skb->network_header.
563
- * Equals to key->eth.type.
564531 */
565
-static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
532
+static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
566533 {
567534 int error;
568
- struct ethhdr *eth;
569
-
570
- /* Flags are always used as part of stats */
571
- key->tp.flags = 0;
572
-
573
- skb_reset_mac_header(skb);
574
-
575
- /* Link layer. */
576
- clear_vlan(key);
577
- if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
578
- if (unlikely(eth_type_vlan(skb->protocol)))
579
- return -EINVAL;
580
-
581
- skb_reset_network_header(skb);
582
- key->eth.type = skb->protocol;
583
- } else {
584
- eth = eth_hdr(skb);
585
- ether_addr_copy(key->eth.src, eth->h_source);
586
- ether_addr_copy(key->eth.dst, eth->h_dest);
587
-
588
- __skb_pull(skb, 2 * ETH_ALEN);
589
- /* We are going to push all headers that we pull, so no need to
590
- * update skb->csum here.
591
- */
592
-
593
- if (unlikely(parse_vlan(skb, key)))
594
- return -ENOMEM;
595
-
596
- key->eth.type = parse_ethertype(skb);
597
- if (unlikely(key->eth.type == htons(0)))
598
- return -ENOMEM;
599
-
600
- /* Multiple tagged packets need to retain TPID to satisfy
601
- * skb_vlan_pop(), which will later shift the ethertype into
602
- * skb->protocol.
603
- */
604
- if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
605
- skb->protocol = key->eth.cvlan.tpid;
606
- else
607
- skb->protocol = key->eth.type;
608
-
609
- skb_reset_network_header(skb);
610
- __skb_push(skb, skb->data - skb_mac_header(skb));
611
- }
612
- skb_reset_mac_len(skb);
613535
614536 /* Network layer. */
615537 if (key->eth.type == htons(ETH_P_IP)) {
....@@ -638,6 +560,7 @@
638560 offset = nh->frag_off & htons(IP_OFFSET);
639561 if (offset) {
640562 key->ip.frag = OVS_FRAG_TYPE_LATER;
563
+ memset(&key->tp, 0, sizeof(key->tp));
641564 return 0;
642565 }
643566 if (nh->frag_off & htons(IP_MF) ||
....@@ -714,27 +637,35 @@
714637 memset(&key->ipv4, 0, sizeof(key->ipv4));
715638 }
716639 } else if (eth_p_mpls(key->eth.type)) {
717
- size_t stack_len = MPLS_HLEN;
640
+ u8 label_count = 1;
718641
642
+ memset(&key->mpls, 0, sizeof(key->mpls));
719643 skb_set_inner_network_header(skb, skb->mac_len);
720644 while (1) {
721645 __be32 lse;
722646
723
- error = check_header(skb, skb->mac_len + stack_len);
647
+ error = check_header(skb, skb->mac_len +
648
+ label_count * MPLS_HLEN);
724649 if (unlikely(error))
725650 return 0;
726651
727652 memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
728653
729
- if (stack_len == MPLS_HLEN)
730
- memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
654
+ if (label_count <= MPLS_LABEL_DEPTH)
655
+ memcpy(&key->mpls.lse[label_count - 1], &lse,
656
+ MPLS_HLEN);
731657
732
- skb_set_inner_network_header(skb, skb->mac_len + stack_len);
658
+ skb_set_inner_network_header(skb, skb->mac_len +
659
+ label_count * MPLS_HLEN);
733660 if (lse & htonl(MPLS_LS_S_MASK))
734661 break;
735662
736
- stack_len += MPLS_HLEN;
663
+ label_count++;
737664 }
665
+ if (label_count > MPLS_LABEL_DEPTH)
666
+ label_count = MPLS_LABEL_DEPTH;
667
+
668
+ key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
738669 } else if (key->eth.type == htons(ETH_P_IPV6)) {
739670 int nh_len; /* IPv6 Header + Extensions */
740671
....@@ -744,7 +675,7 @@
744675 case -EINVAL:
745676 memset(&key->ip, 0, sizeof(key->ip));
746677 memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
747
- /* fall-through */
678
+ fallthrough;
748679 case -EPROTO:
749680 skb->transport_header = skb->network_header;
750681 error = 0;
....@@ -755,8 +686,10 @@
755686 return error;
756687 }
757688
758
- if (key->ip.frag == OVS_FRAG_TYPE_LATER)
689
+ if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
690
+ memset(&key->tp, 0, sizeof(key->tp));
759691 return 0;
692
+ }
760693 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
761694 key->ip.frag = OVS_FRAG_TYPE_FIRST;
762695
....@@ -803,6 +736,92 @@
803736 return 0;
804737 }
805738
739
+/**
740
+ * key_extract - extracts a flow key from an Ethernet frame.
741
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
742
+ * Ethernet header
743
+ * @key: output flow key
744
+ *
745
+ * The caller must ensure that skb->len >= ETH_HLEN.
746
+ *
747
+ * Returns 0 if successful, otherwise a negative errno value.
748
+ *
749
+ * Initializes @skb header fields as follows:
750
+ *
751
+ * - skb->mac_header: the L2 header.
752
+ *
753
+ * - skb->network_header: just past the L2 header, or just past the
754
+ * VLAN header, to the first byte of the L2 payload.
755
+ *
756
+ * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
757
+ * on output, then just past the IP header, if one is present and
758
+ * of a correct length, otherwise the same as skb->network_header.
759
+ * For other key->eth.type values it is left untouched.
760
+ *
761
+ * - skb->protocol: the type of the data starting at skb->network_header.
762
+ * Equals to key->eth.type.
763
+ */
764
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
765
+{
766
+ struct ethhdr *eth;
767
+
768
+ /* Flags are always used as part of stats */
769
+ key->tp.flags = 0;
770
+
771
+ skb_reset_mac_header(skb);
772
+
773
+ /* Link layer. */
774
+ clear_vlan(key);
775
+ if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
776
+ if (unlikely(eth_type_vlan(skb->protocol)))
777
+ return -EINVAL;
778
+
779
+ skb_reset_network_header(skb);
780
+ key->eth.type = skb->protocol;
781
+ } else {
782
+ eth = eth_hdr(skb);
783
+ ether_addr_copy(key->eth.src, eth->h_source);
784
+ ether_addr_copy(key->eth.dst, eth->h_dest);
785
+
786
+ __skb_pull(skb, 2 * ETH_ALEN);
787
+ /* We are going to push all headers that we pull, so no need to
788
+ * update skb->csum here.
789
+ */
790
+
791
+ if (unlikely(parse_vlan(skb, key)))
792
+ return -ENOMEM;
793
+
794
+ key->eth.type = parse_ethertype(skb);
795
+ if (unlikely(key->eth.type == htons(0)))
796
+ return -ENOMEM;
797
+
798
+ /* Multiple tagged packets need to retain TPID to satisfy
799
+ * skb_vlan_pop(), which will later shift the ethertype into
800
+ * skb->protocol.
801
+ */
802
+ if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
803
+ skb->protocol = key->eth.cvlan.tpid;
804
+ else
805
+ skb->protocol = key->eth.type;
806
+
807
+ skb_reset_network_header(skb);
808
+ __skb_push(skb, skb->data - skb_mac_header(skb));
809
+ }
810
+
811
+ skb_reset_mac_len(skb);
812
+
813
+ /* Fill out L3/L4 key info, if any */
814
+ return key_extract_l3l4(skb, key);
815
+}
816
+
817
+/* In the case of conntrack fragment handling it expects L3 headers,
818
+ * add a helper.
819
+ */
820
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
821
+{
822
+ return key_extract_l3l4(skb, key);
823
+}
824
+
806825 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
807826 {
808827 int res;
....@@ -831,6 +850,9 @@
831850 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
832851 struct sk_buff *skb, struct sw_flow_key *key)
833852 {
853
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
854
+ struct tc_skb_ext *tc_ext;
855
+#endif
834856 int res, err;
835857
836858 /* Extract metadata from packet. */
....@@ -863,7 +885,18 @@
863885 if (res < 0)
864886 return res;
865887 key->mac_proto = res;
888
+
889
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
890
+ if (static_branch_unlikely(&tc_recirc_sharing_support)) {
891
+ tc_ext = skb_ext_find(skb, TC_SKB_EXT);
892
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
893
+ OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
894
+ } else {
895
+ key->recirc_id = 0;
896
+ }
897
+#else
866898 key->recirc_id = 0;
899
+#endif
867900
868901 err = key_extract(skb, key);
869902 if (!err)