hc
2024-05-10 748e4f3d702def1a4bff191e0cf93b6a05340f01
kernel/net/core/flow_dissector.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 #include <linux/kernel.h>
23 #include <linux/skbuff.h>
34 #include <linux/export.h>
....@@ -25,6 +26,12 @@
2526 #include <net/flow_dissector.h>
2627 #include <scsi/fc/fc_fcoe.h>
2728 #include <uapi/linux/batadv_packet.h>
29
+#include <linux/bpf.h>
30
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
31
+#include <net/netfilter/nf_conntrack_core.h>
32
+#include <net/netfilter/nf_conntrack_labels.h>
33
+#endif
34
+#include <linux/bpf-netns.h>
2835
2936 static void dissector_set_key(struct flow_dissector *flow_dissector,
3037 enum flow_dissector_key_id key_id)
....@@ -62,27 +69,37 @@
6269 }
6370 EXPORT_SYMBOL(skb_flow_dissector_init);
6471
65
-/**
66
- * skb_flow_get_be16 - extract be16 entity
67
- * @skb: sk_buff to extract from
68
- * @poff: offset to extract at
69
- * @data: raw buffer pointer to the packet
70
- * @hlen: packet header length
71
- *
72
- * The function will try to retrieve a be32 entity at
73
- * offset poff
74
- */
75
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
76
- void *data, int hlen)
72
+#ifdef CONFIG_BPF_SYSCALL
73
+int flow_dissector_bpf_prog_attach_check(struct net *net,
74
+ struct bpf_prog *prog)
7775 {
78
- __be16 *u, _u;
76
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
7977
80
- u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
81
- if (u)
82
- return *u;
78
+ if (net == &init_net) {
79
+ /* BPF flow dissector in the root namespace overrides
80
+ * any per-net-namespace one. When attaching to root,
81
+ * make sure we don't have any BPF program attached
82
+ * to the non-root namespaces.
83
+ */
84
+ struct net *ns;
85
+
86
+ for_each_net(ns) {
87
+ if (ns == &init_net)
88
+ continue;
89
+ if (rcu_access_pointer(ns->bpf.run_array[type]))
90
+ return -EEXIST;
91
+ }
92
+ } else {
93
+ /* Make sure root flow dissector is not attached
94
+ * when attaching to the non-root namespace.
95
+ */
96
+ if (rcu_access_pointer(init_net.bpf.run_array[type]))
97
+ return -EEXIST;
98
+ }
8399
84100 return 0;
85101 }
102
+#endif /* CONFIG_BPF_SYSCALL */
86103
87104 /**
88105 * __skb_flow_get_ports - extract the upper layer ports and return them
....@@ -118,6 +135,88 @@
118135 }
119136 EXPORT_SYMBOL(__skb_flow_get_ports);
120137
138
+static bool icmp_has_id(u8 type)
139
+{
140
+ switch (type) {
141
+ case ICMP_ECHO:
142
+ case ICMP_ECHOREPLY:
143
+ case ICMP_TIMESTAMP:
144
+ case ICMP_TIMESTAMPREPLY:
145
+ case ICMPV6_ECHO_REQUEST:
146
+ case ICMPV6_ECHO_REPLY:
147
+ return true;
148
+ }
149
+
150
+ return false;
151
+}
152
+
153
+/**
154
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
155
+ * @skb: sk_buff to extract from
156
+ * @key_icmp: struct flow_dissector_key_icmp to fill
157
+ * @data: raw buffer pointer to the packet
158
+ * @thoff: offset to extract at
159
+ * @hlen: packet header length
160
+ */
161
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
162
+ struct flow_dissector_key_icmp *key_icmp,
163
+ void *data, int thoff, int hlen)
164
+{
165
+ struct icmphdr *ih, _ih;
166
+
167
+ ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
168
+ if (!ih)
169
+ return;
170
+
171
+ key_icmp->type = ih->type;
172
+ key_icmp->code = ih->code;
173
+
174
+ /* As we use 0 to signal that the Id field is not present,
175
+ * avoid confusion with packets without such field
176
+ */
177
+ if (icmp_has_id(ih->type))
178
+ key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1;
179
+ else
180
+ key_icmp->id = 0;
181
+}
182
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
183
+
184
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
185
+ * using skb_flow_get_icmp_tci().
186
+ */
187
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
188
+ struct flow_dissector *flow_dissector,
189
+ void *target_container,
190
+ void *data, int thoff, int hlen)
191
+{
192
+ struct flow_dissector_key_icmp *key_icmp;
193
+
194
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
195
+ return;
196
+
197
+ key_icmp = skb_flow_dissector_target(flow_dissector,
198
+ FLOW_DISSECTOR_KEY_ICMP,
199
+ target_container);
200
+
201
+ skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
202
+}
203
+
204
+void skb_flow_dissect_meta(const struct sk_buff *skb,
205
+ struct flow_dissector *flow_dissector,
206
+ void *target_container)
207
+{
208
+ struct flow_dissector_key_meta *meta;
209
+
210
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META))
211
+ return;
212
+
213
+ meta = skb_flow_dissector_target(flow_dissector,
214
+ FLOW_DISSECTOR_KEY_META,
215
+ target_container);
216
+ meta->ingress_ifindex = skb->skb_iif;
217
+}
218
+EXPORT_SYMBOL(skb_flow_dissect_meta);
219
+
121220 static void
122221 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
123222 struct flow_dissector *flow_dissector,
....@@ -133,6 +232,46 @@
133232 target_container);
134233 ctrl->addr_type = type;
135234 }
235
+
236
+void
237
+skb_flow_dissect_ct(const struct sk_buff *skb,
238
+ struct flow_dissector *flow_dissector,
239
+ void *target_container,
240
+ u16 *ctinfo_map,
241
+ size_t mapsize)
242
+{
243
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
244
+ struct flow_dissector_key_ct *key;
245
+ enum ip_conntrack_info ctinfo;
246
+ struct nf_conn_labels *cl;
247
+ struct nf_conn *ct;
248
+
249
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT))
250
+ return;
251
+
252
+ ct = nf_ct_get(skb, &ctinfo);
253
+ if (!ct)
254
+ return;
255
+
256
+ key = skb_flow_dissector_target(flow_dissector,
257
+ FLOW_DISSECTOR_KEY_CT,
258
+ target_container);
259
+
260
+ if (ctinfo < mapsize)
261
+ key->ct_state = ctinfo_map[ctinfo];
262
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
263
+ key->ct_zone = ct->zone.id;
264
+#endif
265
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
266
+ key->ct_mark = READ_ONCE(ct->mark);
267
+#endif
268
+
269
+ cl = nf_ct_labels_find(ct);
270
+ if (cl)
271
+ memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels));
272
+#endif /* CONFIG_NF_CONNTRACK */
273
+}
274
+EXPORT_SYMBOL(skb_flow_dissect_ct);
136275
137276 void
138277 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
....@@ -244,18 +383,38 @@
244383 }
245384 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
246385
386
+void skb_flow_dissect_hash(const struct sk_buff *skb,
387
+ struct flow_dissector *flow_dissector,
388
+ void *target_container)
389
+{
390
+ struct flow_dissector_key_hash *key;
391
+
392
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH))
393
+ return;
394
+
395
+ key = skb_flow_dissector_target(flow_dissector,
396
+ FLOW_DISSECTOR_KEY_HASH,
397
+ target_container);
398
+
399
+ key->hash = skb_get_hash_raw(skb);
400
+}
401
+EXPORT_SYMBOL(skb_flow_dissect_hash);
402
+
247403 static enum flow_dissect_ret
248404 __skb_flow_dissect_mpls(const struct sk_buff *skb,
249405 struct flow_dissector *flow_dissector,
250
- void *target_container, void *data, int nhoff, int hlen)
406
+ void *target_container, void *data, int nhoff, int hlen,
407
+ int lse_index, bool *entropy_label)
251408 {
252
- struct flow_dissector_key_keyid *key_keyid;
253
- struct mpls_label *hdr, _hdr[2];
254
- u32 entry, label;
409
+ struct mpls_label *hdr, _hdr;
410
+ u32 entry, label, bos;
255411
256412 if (!dissector_uses_key(flow_dissector,
257413 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
258414 !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
415
+ return FLOW_DISSECT_RET_OUT_GOOD;
416
+
417
+ if (lse_index >= FLOW_DIS_MPLS_MAX)
259418 return FLOW_DISSECT_RET_OUT_GOOD;
260419
261420 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
....@@ -263,31 +422,40 @@
263422 if (!hdr)
264423 return FLOW_DISSECT_RET_OUT_BAD;
265424
266
- entry = ntohl(hdr[0].entry);
425
+ entry = ntohl(hdr->entry);
267426 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
427
+ bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
268428
269429 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
270430 struct flow_dissector_key_mpls *key_mpls;
431
+ struct flow_dissector_mpls_lse *lse;
271432
272433 key_mpls = skb_flow_dissector_target(flow_dissector,
273434 FLOW_DISSECTOR_KEY_MPLS,
274435 target_container);
275
- key_mpls->mpls_label = label;
276
- key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
277
- >> MPLS_LS_TTL_SHIFT;
278
- key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
279
- >> MPLS_LS_TC_SHIFT;
280
- key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
281
- >> MPLS_LS_S_SHIFT;
436
+ lse = &key_mpls->ls[lse_index];
437
+
438
+ lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
439
+ lse->mpls_bos = bos;
440
+ lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
441
+ lse->mpls_label = label;
442
+ dissector_set_mpls_lse(key_mpls, lse_index);
282443 }
283444
284
- if (label == MPLS_LABEL_ENTROPY) {
445
+ if (*entropy_label &&
446
+ dissector_uses_key(flow_dissector,
447
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
448
+ struct flow_dissector_key_keyid *key_keyid;
449
+
285450 key_keyid = skb_flow_dissector_target(flow_dissector,
286451 FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
287452 target_container);
288
- key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
453
+ key_keyid->keyid = cpu_to_be32(label);
289454 }
290
- return FLOW_DISSECT_RET_OUT_GOOD;
455
+
456
+ *entropy_label = label == MPLS_LABEL_ENTROPY;
457
+
458
+ return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN;
291459 }
292460
293461 static enum flow_dissect_ret
....@@ -382,8 +550,8 @@
382550 offset += sizeof(struct gre_base_hdr);
383551
384552 if (hdr->flags & GRE_CSUM)
385
- offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
386
- sizeof(((struct gre_full_hdr *) 0)->reserved1);
553
+ offset += sizeof_field(struct gre_full_hdr, csum) +
554
+ sizeof_field(struct gre_full_hdr, reserved1);
387555
388556 if (hdr->flags & GRE_KEY) {
389557 const __be32 *keyid;
....@@ -405,11 +573,11 @@
405573 else
406574 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
407575 }
408
- offset += sizeof(((struct gre_full_hdr *) 0)->key);
576
+ offset += sizeof_field(struct gre_full_hdr, key);
409577 }
410578
411579 if (hdr->flags & GRE_SEQ)
412
- offset += sizeof(((struct pptp_gre_header *) 0)->seq);
580
+ offset += sizeof_field(struct pptp_gre_header, seq);
413581
414582 if (gre_ver == 0) {
415583 if (*p_proto == htons(ETH_P_TEB)) {
....@@ -436,7 +604,7 @@
436604 u8 *ppp_hdr;
437605
438606 if (hdr->flags & GRE_ACK)
439
- offset += sizeof(((struct pptp_gre_header *) 0)->ack);
607
+ offset += sizeof_field(struct pptp_gre_header, ack);
440608
441609 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
442610 sizeof(_ppp_hdr),
....@@ -543,6 +711,31 @@
543711 }
544712
545713 static void
714
+__skb_flow_dissect_ports(const struct sk_buff *skb,
715
+ struct flow_dissector *flow_dissector,
716
+ void *target_container, void *data, int nhoff,
717
+ u8 ip_proto, int hlen)
718
+{
719
+ enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
720
+ struct flow_dissector_key_ports *key_ports;
721
+
722
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
723
+ dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
724
+ else if (dissector_uses_key(flow_dissector,
725
+ FLOW_DISSECTOR_KEY_PORTS_RANGE))
726
+ dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
727
+
728
+ if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
729
+ return;
730
+
731
+ key_ports = skb_flow_dissector_target(flow_dissector,
732
+ dissector_ports,
733
+ target_container);
734
+ key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
735
+ data, hlen);
736
+}
737
+
738
+static void
546739 __skb_flow_dissect_ipv4(const struct sk_buff *skb,
547740 struct flow_dissector *flow_dissector,
548741 void *target_container, void *data, const struct iphdr *iph)
....@@ -588,8 +781,110 @@
588781 return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
589782 }
590783
784
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
785
+ struct flow_dissector *flow_dissector,
786
+ void *target_container)
787
+{
788
+ struct flow_dissector_key_ports *key_ports = NULL;
789
+ struct flow_dissector_key_control *key_control;
790
+ struct flow_dissector_key_basic *key_basic;
791
+ struct flow_dissector_key_addrs *key_addrs;
792
+ struct flow_dissector_key_tags *key_tags;
793
+
794
+ key_control = skb_flow_dissector_target(flow_dissector,
795
+ FLOW_DISSECTOR_KEY_CONTROL,
796
+ target_container);
797
+ key_control->thoff = flow_keys->thoff;
798
+ if (flow_keys->is_frag)
799
+ key_control->flags |= FLOW_DIS_IS_FRAGMENT;
800
+ if (flow_keys->is_first_frag)
801
+ key_control->flags |= FLOW_DIS_FIRST_FRAG;
802
+ if (flow_keys->is_encap)
803
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
804
+
805
+ key_basic = skb_flow_dissector_target(flow_dissector,
806
+ FLOW_DISSECTOR_KEY_BASIC,
807
+ target_container);
808
+ key_basic->n_proto = flow_keys->n_proto;
809
+ key_basic->ip_proto = flow_keys->ip_proto;
810
+
811
+ if (flow_keys->addr_proto == ETH_P_IP &&
812
+ dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
813
+ key_addrs = skb_flow_dissector_target(flow_dissector,
814
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
815
+ target_container);
816
+ key_addrs->v4addrs.src = flow_keys->ipv4_src;
817
+ key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
818
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
819
+ } else if (flow_keys->addr_proto == ETH_P_IPV6 &&
820
+ dissector_uses_key(flow_dissector,
821
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
822
+ key_addrs = skb_flow_dissector_target(flow_dissector,
823
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
824
+ target_container);
825
+ memcpy(&key_addrs->v6addrs.src, &flow_keys->ipv6_src,
826
+ sizeof(key_addrs->v6addrs.src));
827
+ memcpy(&key_addrs->v6addrs.dst, &flow_keys->ipv6_dst,
828
+ sizeof(key_addrs->v6addrs.dst));
829
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
830
+ }
831
+
832
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
833
+ key_ports = skb_flow_dissector_target(flow_dissector,
834
+ FLOW_DISSECTOR_KEY_PORTS,
835
+ target_container);
836
+ else if (dissector_uses_key(flow_dissector,
837
+ FLOW_DISSECTOR_KEY_PORTS_RANGE))
838
+ key_ports = skb_flow_dissector_target(flow_dissector,
839
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
840
+ target_container);
841
+
842
+ if (key_ports) {
843
+ key_ports->src = flow_keys->sport;
844
+ key_ports->dst = flow_keys->dport;
845
+ }
846
+
847
+ if (dissector_uses_key(flow_dissector,
848
+ FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
849
+ key_tags = skb_flow_dissector_target(flow_dissector,
850
+ FLOW_DISSECTOR_KEY_FLOW_LABEL,
851
+ target_container);
852
+ key_tags->flow_label = ntohl(flow_keys->flow_label);
853
+ }
854
+}
855
+
856
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
857
+ __be16 proto, int nhoff, int hlen, unsigned int flags)
858
+{
859
+ struct bpf_flow_keys *flow_keys = ctx->flow_keys;
860
+ u32 result;
861
+
862
+ /* Pass parameters to the BPF program */
863
+ memset(flow_keys, 0, sizeof(*flow_keys));
864
+ flow_keys->n_proto = proto;
865
+ flow_keys->nhoff = nhoff;
866
+ flow_keys->thoff = flow_keys->nhoff;
867
+
868
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
869
+ (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
870
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
871
+ (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
872
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
873
+ (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
874
+ flow_keys->flags = flags;
875
+
876
+ result = bpf_prog_run_pin_on_cpu(prog, ctx);
877
+
878
+ flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
879
+ flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
880
+ flow_keys->nhoff, hlen);
881
+
882
+ return result == BPF_OK;
883
+}
884
+
591885 /**
592886 * __skb_flow_dissect - extract the flow_keys struct and return it
887
+ * @net: associated network namespace, derived from @skb if NULL
593888 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
594889 * @flow_dissector: list of keys to dissect
595890 * @target_container: target structure to put dissected values into
....@@ -597,6 +892,8 @@
597892 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
598893 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
599894 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
895
+ * @flags: flags that control the dissection process, e.g.
896
+ * FLOW_DISSECTOR_F_STOP_AT_ENCAP.
600897 *
601898 * The function will try to retrieve individual keys into target specified
602899 * by flow_dissector from either the skbuff or a raw buffer specified by the
....@@ -604,7 +901,8 @@
604901 *
605902 * Caller must take care of zeroing target container memory.
606903 */
607
-bool __skb_flow_dissect(const struct sk_buff *skb,
904
+bool __skb_flow_dissect(const struct net *net,
905
+ const struct sk_buff *skb,
608906 struct flow_dissector *flow_dissector,
609907 void *target_container,
610908 void *data, __be16 proto, int nhoff, int hlen,
....@@ -613,12 +911,12 @@
613911 struct flow_dissector_key_control *key_control;
614912 struct flow_dissector_key_basic *key_basic;
615913 struct flow_dissector_key_addrs *key_addrs;
616
- struct flow_dissector_key_ports *key_ports;
617
- struct flow_dissector_key_icmp *key_icmp;
618914 struct flow_dissector_key_tags *key_tags;
619915 struct flow_dissector_key_vlan *key_vlan;
620916 enum flow_dissect_ret fdret;
621917 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
918
+ bool mpls_el = false;
919
+ int mpls_lse = 0;
622920 int num_hdrs = 0;
623921 u8 ip_proto = 0;
624922 bool ret;
....@@ -636,8 +934,14 @@
636934 int offset = 0;
637935
638936 ops = skb->dev->dsa_ptr->tag_ops;
639
- if (ops->flow_dissect &&
640
- !ops->flow_dissect(skb, &proto, &offset)) {
937
+ /* Tail taggers don't break flow dissection */
938
+ if (!ops->tail_tag) {
939
+ if (ops->flow_dissect)
940
+ ops->flow_dissect(skb, &proto, &offset);
941
+ else
942
+ dsa_tag_generic_flow_dissect(skb,
943
+ &proto,
944
+ &offset);
641945 hlen -= offset;
642946 nhoff += offset;
643947 }
....@@ -658,6 +962,55 @@
658962 key_basic = skb_flow_dissector_target(flow_dissector,
659963 FLOW_DISSECTOR_KEY_BASIC,
660964 target_container);
965
+
966
+ if (skb) {
967
+ if (!net) {
968
+ if (skb->dev)
969
+ net = dev_net(skb->dev);
970
+ else if (skb->sk)
971
+ net = sock_net(skb->sk);
972
+ }
973
+ }
974
+
975
+ WARN_ON_ONCE(!net);
976
+ if (net) {
977
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
978
+ struct bpf_prog_array *run_array;
979
+
980
+ rcu_read_lock();
981
+ run_array = rcu_dereference(init_net.bpf.run_array[type]);
982
+ if (!run_array)
983
+ run_array = rcu_dereference(net->bpf.run_array[type]);
984
+
985
+ if (run_array) {
986
+ struct bpf_flow_keys flow_keys;
987
+ struct bpf_flow_dissector ctx = {
988
+ .flow_keys = &flow_keys,
989
+ .data = data,
990
+ .data_end = data + hlen,
991
+ };
992
+ __be16 n_proto = proto;
993
+ struct bpf_prog *prog;
994
+
995
+ if (skb) {
996
+ ctx.skb = skb;
997
+ /* we can't use 'proto' in the skb case
998
+ * because it might be set to skb->vlan_proto
999
+ * which has been pulled from the data
1000
+ */
1001
+ n_proto = skb->protocol;
1002
+ }
1003
+
1004
+ prog = READ_ONCE(run_array->items[0].prog);
1005
+ ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
1006
+ hlen, flags);
1007
+ __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
1008
+ target_container);
1009
+ rcu_read_unlock();
1010
+ return ret;
1011
+ }
1012
+ rcu_read_unlock();
1013
+ }
6611014
6621015 if (dissector_uses_key(flow_dissector,
6631016 FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
....@@ -701,6 +1054,9 @@
7011054 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
7021055 }
7031056
1057
+ __skb_flow_dissect_ipv4(skb, flow_dissector,
1058
+ target_container, data, iph);
1059
+
7041060 if (ip_is_fragment(iph)) {
7051061 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
7061062
....@@ -715,14 +1071,6 @@
7151071 break;
7161072 }
7171073 }
718
- }
719
-
720
- __skb_flow_dissect_ipv4(skb, flow_dissector,
721
- target_container, data, iph);
722
-
723
- if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
724
- fdret = FLOW_DISSECT_RET_OUT_GOOD;
725
- break;
7261074 }
7271075
7281076 break;
....@@ -775,9 +1123,6 @@
7751123 __skb_flow_dissect_ipv6(skb, flow_dissector,
7761124 target_container, data, iph);
7771125
778
- if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
779
- fdret = FLOW_DISSECT_RET_OUT_GOOD;
780
-
7811126 break;
7821127 }
7831128 case htons(ETH_P_8021AD):
....@@ -817,8 +1162,7 @@
8171162
8181163 if (!vlan) {
8191164 key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
820
- key_vlan->vlan_priority =
821
- (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
1165
+ key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb);
8221166 } else {
8231167 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
8241168 VLAN_VID_MASK;
....@@ -827,6 +1171,7 @@
8271171 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
8281172 }
8291173 key_vlan->vlan_tpid = saved_vlan_tpid;
1174
+ key_vlan->vlan_eth_type = proto;
8301175 }
8311176
8321177 fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
....@@ -886,7 +1231,10 @@
8861231 case htons(ETH_P_MPLS_MC):
8871232 fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
8881233 target_container, data,
889
- nhoff, hlen);
1234
+ nhoff, hlen, mpls_lse,
1235
+ &mpls_el);
1236
+ nhoff += sizeof(struct mpls_label);
1237
+ mpls_lse++;
8901238 break;
8911239 case htons(ETH_P_FCOE):
8921240 if ((hlen - nhoff) < FCOE_HEADER_LEN) {
....@@ -1027,26 +1375,19 @@
10271375 data, nhoff, hlen);
10281376 break;
10291377
1378
+ case IPPROTO_ICMP:
1379
+ case IPPROTO_ICMPV6:
1380
+ __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
1381
+ data, nhoff, hlen);
1382
+ break;
1383
+
10301384 default:
10311385 break;
10321386 }
10331387
1034
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
1035
- !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
1036
- key_ports = skb_flow_dissector_target(flow_dissector,
1037
- FLOW_DISSECTOR_KEY_PORTS,
1038
- target_container);
1039
- key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
1040
- data, hlen);
1041
- }
1042
-
1043
- if (dissector_uses_key(flow_dissector,
1044
- FLOW_DISSECTOR_KEY_ICMP)) {
1045
- key_icmp = skb_flow_dissector_target(flow_dissector,
1046
- FLOW_DISSECTOR_KEY_ICMP,
1047
- target_container);
1048
- key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
1049
- }
1388
+ if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT))
1389
+ __skb_flow_dissect_ports(skb, flow_dissector, target_container,
1390
+ data, nhoff, ip_proto, hlen);
10501391
10511392 /* Process result of IP proto processing */
10521393 switch (fdret) {
....@@ -1097,8 +1438,8 @@
10971438 static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
10981439 {
10991440 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
1100
- BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
1101
- sizeof(*flow) - sizeof(flow->addrs));
1441
+
1442
+ BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
11021443
11031444 switch (flow->control.addr_type) {
11041445 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
....@@ -1144,19 +1485,21 @@
11441485 }
11451486 EXPORT_SYMBOL(flow_get_u32_dst);
11461487
1488
+/* Sort the source and destination IP and the ports,
1489
+ * to have consistent hash within the two directions
1490
+ */
11471491 static inline void __flow_hash_consistentify(struct flow_keys *keys)
11481492 {
11491493 int addr_diff, i;
11501494
11511495 switch (keys->control.addr_type) {
11521496 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1153
- addr_diff = (__force u32)keys->addrs.v4addrs.dst -
1154
- (__force u32)keys->addrs.v4addrs.src;
1155
- if ((addr_diff < 0) ||
1156
- (addr_diff == 0 &&
1157
- ((__force u16)keys->ports.dst <
1158
- (__force u16)keys->ports.src))) {
1497
+ if ((__force u32)keys->addrs.v4addrs.dst <
1498
+ (__force u32)keys->addrs.v4addrs.src)
11591499 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
1500
+
1501
+ if ((__force u16)keys->ports.dst <
1502
+ (__force u16)keys->ports.src) {
11601503 swap(keys->ports.src, keys->ports.dst);
11611504 }
11621505 break;
....@@ -1164,13 +1507,13 @@
11641507 addr_diff = memcmp(&keys->addrs.v6addrs.dst,
11651508 &keys->addrs.v6addrs.src,
11661509 sizeof(keys->addrs.v6addrs.dst));
1167
- if ((addr_diff < 0) ||
1168
- (addr_diff == 0 &&
1169
- ((__force u16)keys->ports.dst <
1170
- (__force u16)keys->ports.src))) {
1510
+ if (addr_diff < 0) {
11711511 for (i = 0; i < 4; i++)
11721512 swap(keys->addrs.v6addrs.src.s6_addr32[i],
11731513 keys->addrs.v6addrs.dst.s6_addr32[i]);
1514
+ }
1515
+ if ((__force u16)keys->ports.dst <
1516
+ (__force u16)keys->ports.src) {
11741517 swap(keys->ports.src, keys->ports.dst);
11751518 }
11761519 break;
....@@ -1245,9 +1588,8 @@
12451588 __flow_hash_secret_init();
12461589
12471590 memset(&keys, 0, sizeof(keys));
1248
- __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
1249
- NULL, 0, 0, 0,
1250
- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
1591
+ __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
1592
+ &keys, NULL, 0, 0, 0, 0);
12511593
12521594 return __flow_hash_from_keys(&keys, &hashrnd);
12531595 }
....@@ -1348,7 +1690,8 @@
13481690 {
13491691 struct flow_keys_basic keys;
13501692
1351
- if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
1693
+ if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
1694
+ NULL, 0, 0, 0, 0))
13521695 return 0;
13531696
13541697 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
....@@ -1465,5 +1808,4 @@
14651808 ARRAY_SIZE(flow_keys_basic_dissector_keys));
14661809 return 0;
14671810 }
1468
-
14691811 core_initcall(init_default_flow_dissectors);