| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * net/psample/psample.c - Netlink channel for packet sampling |
|---|
| 3 | 4 | * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 6 | | - * it under the terms of the GNU General Public License version 2 as |
|---|
| 7 | | - * published by the Free Software Foundation. |
|---|
| 8 | 5 | */ |
|---|
| 9 | 6 | |
|---|
| 10 | 7 | #include <linux/types.h> |
|---|
| .. | .. |
|---|
| 17 | 14 | #include <net/genetlink.h> |
|---|
| 18 | 15 | #include <net/psample.h> |
|---|
| 19 | 16 | #include <linux/spinlock.h> |
|---|
| 17 | +#include <net/ip_tunnels.h> |
|---|
| 18 | +#include <net/dst_metadata.h> |
|---|
| 20 | 19 | |
|---|
| 21 | 20 | #define PSAMPLE_MAX_PACKET_SIZE 0xffff |
|---|
| 22 | 21 | |
|---|
| .. | .. |
|---|
| 76 | 75 | int idx = 0; |
|---|
| 77 | 76 | int err; |
|---|
| 78 | 77 | |
|---|
| 79 | | - spin_lock(&psample_groups_lock); |
|---|
| 78 | + spin_lock_bh(&psample_groups_lock); |
|---|
| 80 | 79 | list_for_each_entry(group, &psample_groups_list, list) { |
|---|
| 81 | 80 | if (!net_eq(group->net, sock_net(msg->sk))) |
|---|
| 82 | 81 | continue; |
|---|
| .. | .. |
|---|
| 92 | 91 | idx++; |
|---|
| 93 | 92 | } |
|---|
| 94 | 93 | |
|---|
| 95 | | - spin_unlock(&psample_groups_lock); |
|---|
| 94 | + spin_unlock_bh(&psample_groups_lock); |
|---|
| 96 | 95 | cb->args[0] = idx; |
|---|
| 97 | 96 | return msg->len; |
|---|
| 98 | 97 | } |
|---|
| 99 | 98 | |
|---|
| 100 | | -static const struct genl_ops psample_nl_ops[] = { |
|---|
| 99 | +static const struct genl_small_ops psample_nl_ops[] = { |
|---|
| 101 | 100 | { |
|---|
| 102 | 101 | .cmd = PSAMPLE_CMD_GET_GROUP, |
|---|
| 102 | + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
|---|
| 103 | 103 | .dumpit = psample_nl_cmd_get_group_dumpit, |
|---|
| 104 | 104 | /* can be retrieved by unprivileged users */ |
|---|
| 105 | 105 | } |
|---|
| .. | .. |
|---|
| 112 | 112 | .netnsok = true, |
|---|
| 113 | 113 | .module = THIS_MODULE, |
|---|
| 114 | 114 | .mcgrps = psample_nl_mcgrps, |
|---|
| 115 | | - .ops = psample_nl_ops, |
|---|
| 116 | | - .n_ops = ARRAY_SIZE(psample_nl_ops), |
|---|
| 115 | + .small_ops = psample_nl_ops, |
|---|
| 116 | + .n_small_ops = ARRAY_SIZE(psample_nl_ops), |
|---|
| 117 | 117 | .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), |
|---|
| 118 | 118 | }; |
|---|
| 119 | 119 | |
|---|
| .. | .. |
|---|
| 174 | 174 | { |
|---|
| 175 | 175 | struct psample_group *group; |
|---|
| 176 | 176 | |
|---|
| 177 | | - spin_lock(&psample_groups_lock); |
|---|
| 177 | + spin_lock_bh(&psample_groups_lock); |
|---|
| 178 | 178 | |
|---|
| 179 | 179 | group = psample_group_lookup(net, group_num); |
|---|
| 180 | 180 | if (!group) { |
|---|
| .. | .. |
|---|
| 185 | 185 | group->refcount++; |
|---|
| 186 | 186 | |
|---|
| 187 | 187 | out: |
|---|
| 188 | | - spin_unlock(&psample_groups_lock); |
|---|
| 188 | + spin_unlock_bh(&psample_groups_lock); |
|---|
| 189 | 189 | return group; |
|---|
| 190 | 190 | } |
|---|
| 191 | 191 | EXPORT_SYMBOL_GPL(psample_group_get); |
|---|
| 192 | 192 | |
|---|
| 193 | +void psample_group_take(struct psample_group *group) |
|---|
| 194 | +{ |
|---|
| 195 | + spin_lock_bh(&psample_groups_lock); |
|---|
| 196 | + group->refcount++; |
|---|
| 197 | + spin_unlock_bh(&psample_groups_lock); |
|---|
| 198 | +} |
|---|
| 199 | +EXPORT_SYMBOL_GPL(psample_group_take); |
|---|
| 200 | + |
|---|
| 193 | 201 | void psample_group_put(struct psample_group *group) |
|---|
| 194 | 202 | { |
|---|
| 195 | | - spin_lock(&psample_groups_lock); |
|---|
| 203 | + spin_lock_bh(&psample_groups_lock); |
|---|
| 196 | 204 | |
|---|
| 197 | 205 | if (--group->refcount == 0) |
|---|
| 198 | 206 | psample_group_destroy(group); |
|---|
| 199 | 207 | |
|---|
| 200 | | - spin_unlock(&psample_groups_lock); |
|---|
| 208 | + spin_unlock_bh(&psample_groups_lock); |
|---|
| 201 | 209 | } |
|---|
| 202 | 210 | EXPORT_SYMBOL_GPL(psample_group_put); |
|---|
| 211 | + |
|---|
| 212 | +#ifdef CONFIG_INET |
|---|
| 213 | +static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, |
|---|
| 214 | + struct ip_tunnel_info *tun_info) |
|---|
| 215 | +{ |
|---|
| 216 | + unsigned short tun_proto = ip_tunnel_info_af(tun_info); |
|---|
| 217 | + const void *tun_opts = ip_tunnel_info_opts(tun_info); |
|---|
| 218 | + const struct ip_tunnel_key *tun_key = &tun_info->key; |
|---|
| 219 | + int tun_opts_len = tun_info->options_len; |
|---|
| 220 | + |
|---|
| 221 | + if (tun_key->tun_flags & TUNNEL_KEY && |
|---|
| 222 | + nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, |
|---|
| 223 | + PSAMPLE_TUNNEL_KEY_ATTR_PAD)) |
|---|
| 224 | + return -EMSGSIZE; |
|---|
| 225 | + |
|---|
| 226 | + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE && |
|---|
| 227 | + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)) |
|---|
| 228 | + return -EMSGSIZE; |
|---|
| 229 | + |
|---|
| 230 | + switch (tun_proto) { |
|---|
| 231 | + case AF_INET: |
|---|
| 232 | + if (tun_key->u.ipv4.src && |
|---|
| 233 | + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, |
|---|
| 234 | + tun_key->u.ipv4.src)) |
|---|
| 235 | + return -EMSGSIZE; |
|---|
| 236 | + if (tun_key->u.ipv4.dst && |
|---|
| 237 | + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, |
|---|
| 238 | + tun_key->u.ipv4.dst)) |
|---|
| 239 | + return -EMSGSIZE; |
|---|
| 240 | + break; |
|---|
| 241 | + case AF_INET6: |
|---|
| 242 | + if (!ipv6_addr_any(&tun_key->u.ipv6.src) && |
|---|
| 243 | + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, |
|---|
| 244 | + &tun_key->u.ipv6.src)) |
|---|
| 245 | + return -EMSGSIZE; |
|---|
| 246 | + if (!ipv6_addr_any(&tun_key->u.ipv6.dst) && |
|---|
| 247 | + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, |
|---|
| 248 | + &tun_key->u.ipv6.dst)) |
|---|
| 249 | + return -EMSGSIZE; |
|---|
| 250 | + break; |
|---|
| 251 | + } |
|---|
| 252 | + if (tun_key->tos && |
|---|
| 253 | + nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos)) |
|---|
| 254 | + return -EMSGSIZE; |
|---|
| 255 | + if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) |
|---|
| 256 | + return -EMSGSIZE; |
|---|
| 257 | + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && |
|---|
| 258 | + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) |
|---|
| 259 | + return -EMSGSIZE; |
|---|
| 260 | + if ((tun_key->tun_flags & TUNNEL_CSUM) && |
|---|
| 261 | + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) |
|---|
| 262 | + return -EMSGSIZE; |
|---|
| 263 | + if (tun_key->tp_src && |
|---|
| 264 | + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src)) |
|---|
| 265 | + return -EMSGSIZE; |
|---|
| 266 | + if (tun_key->tp_dst && |
|---|
| 267 | + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) |
|---|
| 268 | + return -EMSGSIZE; |
|---|
| 269 | + if ((tun_key->tun_flags & TUNNEL_OAM) && |
|---|
| 270 | + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) |
|---|
| 271 | + return -EMSGSIZE; |
|---|
| 272 | + if (tun_opts_len) { |
|---|
| 273 | + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && |
|---|
| 274 | + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, |
|---|
| 275 | + tun_opts_len, tun_opts)) |
|---|
| 276 | + return -EMSGSIZE; |
|---|
| 277 | + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && |
|---|
| 278 | + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, |
|---|
| 279 | + tun_opts_len, tun_opts)) |
|---|
| 280 | + return -EMSGSIZE; |
|---|
| 281 | + } |
|---|
| 282 | + |
|---|
| 283 | + return 0; |
|---|
| 284 | +} |
|---|
| 285 | + |
|---|
| 286 | +static int psample_ip_tun_to_nlattr(struct sk_buff *skb, |
|---|
| 287 | + struct ip_tunnel_info *tun_info) |
|---|
| 288 | +{ |
|---|
| 289 | + struct nlattr *nla; |
|---|
| 290 | + int err; |
|---|
| 291 | + |
|---|
| 292 | + nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL); |
|---|
| 293 | + if (!nla) |
|---|
| 294 | + return -EMSGSIZE; |
|---|
| 295 | + |
|---|
| 296 | + err = __psample_ip_tun_to_nlattr(skb, tun_info); |
|---|
| 297 | + if (err) { |
|---|
| 298 | + nla_nest_cancel(skb, nla); |
|---|
| 299 | + return err; |
|---|
| 300 | + } |
|---|
| 301 | + |
|---|
| 302 | + nla_nest_end(skb, nla); |
|---|
| 303 | + |
|---|
| 304 | + return 0; |
|---|
| 305 | +} |
|---|
| 306 | + |
|---|
| 307 | +static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) |
|---|
| 308 | +{ |
|---|
| 309 | + unsigned short tun_proto = ip_tunnel_info_af(tun_info); |
|---|
| 310 | + const struct ip_tunnel_key *tun_key = &tun_info->key; |
|---|
| 311 | + int tun_opts_len = tun_info->options_len; |
|---|
| 312 | + int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ |
|---|
| 313 | + |
|---|
| 314 | + if (tun_key->tun_flags & TUNNEL_KEY) |
|---|
| 315 | + sum += nla_total_size_64bit(sizeof(u64)); |
|---|
| 316 | + |
|---|
| 317 | + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) |
|---|
| 318 | + sum += nla_total_size(0); |
|---|
| 319 | + |
|---|
| 320 | + switch (tun_proto) { |
|---|
| 321 | + case AF_INET: |
|---|
| 322 | + if (tun_key->u.ipv4.src) |
|---|
| 323 | + sum += nla_total_size(sizeof(u32)); |
|---|
| 324 | + if (tun_key->u.ipv4.dst) |
|---|
| 325 | + sum += nla_total_size(sizeof(u32)); |
|---|
| 326 | + break; |
|---|
| 327 | + case AF_INET6: |
|---|
| 328 | + if (!ipv6_addr_any(&tun_key->u.ipv6.src)) |
|---|
| 329 | + sum += nla_total_size(sizeof(struct in6_addr)); |
|---|
| 330 | + if (!ipv6_addr_any(&tun_key->u.ipv6.dst)) |
|---|
| 331 | + sum += nla_total_size(sizeof(struct in6_addr)); |
|---|
| 332 | + break; |
|---|
| 333 | + } |
|---|
| 334 | + if (tun_key->tos) |
|---|
| 335 | + sum += nla_total_size(sizeof(u8)); |
|---|
| 336 | + sum += nla_total_size(sizeof(u8)); /* TTL */ |
|---|
| 337 | + if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) |
|---|
| 338 | + sum += nla_total_size(0); |
|---|
| 339 | + if (tun_key->tun_flags & TUNNEL_CSUM) |
|---|
| 340 | + sum += nla_total_size(0); |
|---|
| 341 | + if (tun_key->tp_src) |
|---|
| 342 | + sum += nla_total_size(sizeof(u16)); |
|---|
| 343 | + if (tun_key->tp_dst) |
|---|
| 344 | + sum += nla_total_size(sizeof(u16)); |
|---|
| 345 | + if (tun_key->tun_flags & TUNNEL_OAM) |
|---|
| 346 | + sum += nla_total_size(0); |
|---|
| 347 | + if (tun_opts_len) { |
|---|
| 348 | + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) |
|---|
| 349 | + sum += nla_total_size(tun_opts_len); |
|---|
| 350 | + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) |
|---|
| 351 | + sum += nla_total_size(tun_opts_len); |
|---|
| 352 | + } |
|---|
| 353 | + |
|---|
| 354 | + return sum; |
|---|
| 355 | +} |
|---|
| 356 | +#endif |
|---|
| 203 | 357 | |
|---|
| 204 | 358 | void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, |
|---|
| 205 | 359 | u32 trunc_size, int in_ifindex, int out_ifindex, |
|---|
| 206 | 360 | u32 sample_rate) |
|---|
| 207 | 361 | { |
|---|
| 362 | +#ifdef CONFIG_INET |
|---|
| 363 | + struct ip_tunnel_info *tun_info; |
|---|
| 364 | +#endif |
|---|
| 208 | 365 | struct sk_buff *nl_skb; |
|---|
| 209 | 366 | int data_len; |
|---|
| 210 | 367 | int meta_len; |
|---|
| .. | .. |
|---|
| 217 | 374 | nla_total_size(sizeof(u32)) + /* orig_size */ |
|---|
| 218 | 375 | nla_total_size(sizeof(u32)) + /* group_num */ |
|---|
| 219 | 376 | nla_total_size(sizeof(u32)); /* seq */ |
|---|
| 377 | + |
|---|
| 378 | +#ifdef CONFIG_INET |
|---|
| 379 | + tun_info = skb_tunnel_info(skb); |
|---|
| 380 | + if (tun_info) |
|---|
| 381 | + meta_len += psample_tunnel_meta_len(tun_info); |
|---|
| 382 | +#endif |
|---|
| 220 | 383 | |
|---|
| 221 | 384 | data_len = min(skb->len, trunc_size); |
|---|
| 222 | 385 | if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) |
|---|
| .. | .. |
|---|
| 272 | 435 | goto error; |
|---|
| 273 | 436 | } |
|---|
| 274 | 437 | |
|---|
| 438 | +#ifdef CONFIG_INET |
|---|
| 439 | + if (tun_info) { |
|---|
| 440 | + ret = psample_ip_tun_to_nlattr(nl_skb, tun_info); |
|---|
| 441 | + if (unlikely(ret < 0)) |
|---|
| 442 | + goto error; |
|---|
| 443 | + } |
|---|
| 444 | +#endif |
|---|
| 445 | + |
|---|
| 275 | 446 | genlmsg_end(nl_skb, data); |
|---|
| 276 | 447 | genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, |
|---|
| 277 | 448 | PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); |
|---|