| .. | .. |
|---|
| 42 | 42 | #include <net/neighbour.h> |
|---|
| 43 | 43 | #include <net/route.h> |
|---|
| 44 | 44 | #include <net/netevent.h> |
|---|
| 45 | | -#include <net/addrconf.h> |
|---|
| 45 | +#include <net/ipv6_stubs.h> |
|---|
| 46 | 46 | #include <net/ip6_route.h> |
|---|
| 47 | 47 | #include <rdma/ib_addr.h> |
|---|
| 48 | +#include <rdma/ib_cache.h> |
|---|
| 49 | +#include <rdma/ib_sa.h> |
|---|
| 48 | 50 | #include <rdma/ib.h> |
|---|
| 49 | 51 | #include <rdma/rdma_netlink.h> |
|---|
| 50 | 52 | #include <net/netlink.h> |
|---|
| .. | .. |
|---|
| 61 | 63 | struct rdma_dev_addr *addr, void *context); |
|---|
| 62 | 64 | unsigned long timeout; |
|---|
| 63 | 65 | struct delayed_work work; |
|---|
| 66 | + bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */ |
|---|
| 64 | 67 | int status; |
|---|
| 65 | 68 | u32 seq; |
|---|
| 66 | 69 | }; |
|---|
| .. | .. |
|---|
| 73 | 76 | |
|---|
| 74 | 77 | static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { |
|---|
| 75 | 78 | [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, |
|---|
| 76 | | - .len = sizeof(struct rdma_nla_ls_gid)}, |
|---|
| 79 | + .len = sizeof(struct rdma_nla_ls_gid), |
|---|
| 80 | + .validation_type = NLA_VALIDATE_MIN, |
|---|
| 81 | + .min = sizeof(struct rdma_nla_ls_gid)}, |
|---|
| 77 | 82 | }; |
|---|
| 78 | 83 | |
|---|
| 79 | 84 | static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) |
|---|
| .. | .. |
|---|
| 84 | 89 | if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) |
|---|
| 85 | 90 | return false; |
|---|
| 86 | 91 | |
|---|
| 87 | | - ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), |
|---|
| 88 | | - nlmsg_len(nlh), ib_nl_addr_policy, NULL); |
|---|
| 92 | + ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), |
|---|
| 93 | + nlmsg_len(nlh), ib_nl_addr_policy, NULL); |
|---|
| 89 | 94 | if (ret) |
|---|
| 90 | 95 | return false; |
|---|
| 91 | 96 | |
|---|
| .. | .. |
|---|
| 180 | 185 | |
|---|
| 181 | 186 | /* Repair the nlmsg header length */ |
|---|
| 182 | 187 | nlmsg_end(skb, nlh); |
|---|
| 183 | | - rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); |
|---|
| 188 | + rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); |
|---|
| 184 | 189 | |
|---|
| 185 | 190 | /* Make the request retry, so when we get the response from userspace |
|---|
| 186 | 191 | * we will have something. |
|---|
| .. | .. |
|---|
| 219 | 224 | } |
|---|
| 220 | 225 | EXPORT_SYMBOL(rdma_addr_size_kss); |
|---|
| 221 | 226 | |
|---|
| 222 | | -void rdma_copy_addr(struct rdma_dev_addr *dev_addr, |
|---|
| 223 | | - const struct net_device *dev, |
|---|
| 224 | | - const unsigned char *dst_dev_addr) |
|---|
| 227 | +/** |
|---|
| 228 | + * rdma_copy_src_l2_addr - Copy netdevice source addresses |
|---|
| 229 | + * @dev_addr: Destination address pointer where to copy the addresses |
|---|
| 230 | + * @dev: Netdevice whose source addresses to copy |
|---|
| 231 | + * |
|---|
| 232 | + * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. |
|---|
| 233 | + * This includes unicast address, broadcast address, device type and |
|---|
| 234 | + * interface index. |
|---|
| 235 | + */ |
|---|
| 236 | +void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, |
|---|
| 237 | + const struct net_device *dev) |
|---|
| 225 | 238 | { |
|---|
| 226 | 239 | dev_addr->dev_type = dev->type; |
|---|
| 227 | 240 | memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); |
|---|
| 228 | 241 | memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); |
|---|
| 229 | | - if (dst_dev_addr) |
|---|
| 230 | | - memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); |
|---|
| 231 | 242 | dev_addr->bound_dev_if = dev->ifindex; |
|---|
| 232 | 243 | } |
|---|
| 233 | | -EXPORT_SYMBOL(rdma_copy_addr); |
|---|
| 244 | +EXPORT_SYMBOL(rdma_copy_src_l2_addr); |
|---|
| 245 | + |
|---|
| 246 | +static struct net_device * |
|---|
| 247 | +rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) |
|---|
| 248 | +{ |
|---|
| 249 | + struct net_device *dev = NULL; |
|---|
| 250 | + int ret = -EADDRNOTAVAIL; |
|---|
| 251 | + |
|---|
| 252 | + switch (src_in->sa_family) { |
|---|
| 253 | + case AF_INET: |
|---|
| 254 | + dev = __ip_dev_find(net, |
|---|
| 255 | + ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, |
|---|
| 256 | + false); |
|---|
| 257 | + if (dev) |
|---|
| 258 | + ret = 0; |
|---|
| 259 | + break; |
|---|
| 260 | +#if IS_ENABLED(CONFIG_IPV6) |
|---|
| 261 | + case AF_INET6: |
|---|
| 262 | + for_each_netdev_rcu(net, dev) { |
|---|
| 263 | + if (ipv6_chk_addr(net, |
|---|
| 264 | + &((const struct sockaddr_in6 *)src_in)->sin6_addr, |
|---|
| 265 | + dev, 1)) { |
|---|
| 266 | + ret = 0; |
|---|
| 267 | + break; |
|---|
| 268 | + } |
|---|
| 269 | + } |
|---|
| 270 | + break; |
|---|
| 271 | +#endif |
|---|
| 272 | + } |
|---|
| 273 | + return ret ? ERR_PTR(ret) : dev; |
|---|
| 274 | +} |
|---|
| 234 | 275 | |
|---|
| 235 | 276 | int rdma_translate_ip(const struct sockaddr *addr, |
|---|
| 236 | 277 | struct rdma_dev_addr *dev_addr) |
|---|
| .. | .. |
|---|
| 241 | 282 | dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); |
|---|
| 242 | 283 | if (!dev) |
|---|
| 243 | 284 | return -ENODEV; |
|---|
| 244 | | - rdma_copy_addr(dev_addr, dev, NULL); |
|---|
| 285 | + rdma_copy_src_l2_addr(dev_addr, dev); |
|---|
| 245 | 286 | dev_put(dev); |
|---|
| 246 | 287 | return 0; |
|---|
| 247 | 288 | } |
|---|
| 248 | 289 | |
|---|
| 249 | | - switch (addr->sa_family) { |
|---|
| 250 | | - case AF_INET: |
|---|
| 251 | | - dev = ip_dev_find(dev_addr->net, |
|---|
| 252 | | - ((const struct sockaddr_in *)addr)->sin_addr.s_addr); |
|---|
| 253 | | - |
|---|
| 254 | | - if (!dev) |
|---|
| 255 | | - return -EADDRNOTAVAIL; |
|---|
| 256 | | - |
|---|
| 257 | | - rdma_copy_addr(dev_addr, dev, NULL); |
|---|
| 258 | | - dev_put(dev); |
|---|
| 259 | | - break; |
|---|
| 260 | | -#if IS_ENABLED(CONFIG_IPV6) |
|---|
| 261 | | - case AF_INET6: |
|---|
| 262 | | - rcu_read_lock(); |
|---|
| 263 | | - for_each_netdev_rcu(dev_addr->net, dev) { |
|---|
| 264 | | - if (ipv6_chk_addr(dev_addr->net, |
|---|
| 265 | | - &((const struct sockaddr_in6 *)addr)->sin6_addr, |
|---|
| 266 | | - dev, 1)) { |
|---|
| 267 | | - rdma_copy_addr(dev_addr, dev, NULL); |
|---|
| 268 | | - break; |
|---|
| 269 | | - } |
|---|
| 270 | | - } |
|---|
| 271 | | - rcu_read_unlock(); |
|---|
| 272 | | - break; |
|---|
| 273 | | -#endif |
|---|
| 274 | | - } |
|---|
| 275 | | - return 0; |
|---|
| 290 | + rcu_read_lock(); |
|---|
| 291 | + dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); |
|---|
| 292 | + if (!IS_ERR(dev)) |
|---|
| 293 | + rdma_copy_src_l2_addr(dev_addr, dev); |
|---|
| 294 | + rcu_read_unlock(); |
|---|
| 295 | + return PTR_ERR_OR_ZERO(dev); |
|---|
| 276 | 296 | } |
|---|
| 277 | 297 | EXPORT_SYMBOL(rdma_translate_ip); |
|---|
| 278 | 298 | |
|---|
| .. | .. |
|---|
| 295 | 315 | spin_unlock_bh(&lock); |
|---|
| 296 | 316 | } |
|---|
| 297 | 317 | |
|---|
| 298 | | -static int ib_nl_fetch_ha(const struct dst_entry *dst, |
|---|
| 299 | | - struct rdma_dev_addr *dev_addr, |
|---|
| 318 | +static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, |
|---|
| 300 | 319 | const void *daddr, u32 seq, u16 family) |
|---|
| 301 | 320 | { |
|---|
| 302 | | - if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) |
|---|
| 321 | + if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) |
|---|
| 303 | 322 | return -EADDRNOTAVAIL; |
|---|
| 304 | 323 | |
|---|
| 305 | | - /* We fill in what we can, the response will fill the rest */ |
|---|
| 306 | | - rdma_copy_addr(dev_addr, dst->dev, NULL); |
|---|
| 307 | 324 | return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); |
|---|
| 308 | 325 | } |
|---|
| 309 | 326 | |
|---|
| .. | .. |
|---|
| 322 | 339 | neigh_event_send(n, NULL); |
|---|
| 323 | 340 | ret = -ENODATA; |
|---|
| 324 | 341 | } else { |
|---|
| 325 | | - rdma_copy_addr(dev_addr, dst->dev, n->ha); |
|---|
| 342 | + neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); |
|---|
| 326 | 343 | } |
|---|
| 327 | 344 | |
|---|
| 328 | 345 | neigh_release(n); |
|---|
| .. | .. |
|---|
| 356 | 373 | (const void *)&dst_in6->sin6_addr; |
|---|
| 357 | 374 | sa_family_t family = dst_in->sa_family; |
|---|
| 358 | 375 | |
|---|
| 359 | | - /* Gateway + ARPHRD_INFINIBAND -> IB router */ |
|---|
| 360 | | - if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) |
|---|
| 361 | | - return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); |
|---|
| 376 | + might_sleep(); |
|---|
| 377 | + |
|---|
| 378 | + /* If we have a gateway in IB mode then it must be an IB network */ |
|---|
| 379 | + if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) |
|---|
| 380 | + return ib_nl_fetch_ha(dev_addr, daddr, seq, family); |
|---|
| 362 | 381 | else |
|---|
| 363 | 382 | return dst_fetch_ha(dst, dev_addr, daddr); |
|---|
| 364 | 383 | } |
|---|
| 365 | 384 | |
|---|
| 366 | | -static int addr4_resolve(struct sockaddr_in *src_in, |
|---|
| 367 | | - const struct sockaddr_in *dst_in, |
|---|
| 385 | +static int addr4_resolve(struct sockaddr *src_sock, |
|---|
| 386 | + const struct sockaddr *dst_sock, |
|---|
| 368 | 387 | struct rdma_dev_addr *addr, |
|---|
| 369 | 388 | struct rtable **prt) |
|---|
| 370 | 389 | { |
|---|
| 390 | + struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; |
|---|
| 391 | + const struct sockaddr_in *dst_in = |
|---|
| 392 | + (const struct sockaddr_in *)dst_sock; |
|---|
| 393 | + |
|---|
| 371 | 394 | __be32 src_ip = src_in->sin_addr.s_addr; |
|---|
| 372 | 395 | __be32 dst_ip = dst_in->sin_addr.s_addr; |
|---|
| 373 | 396 | struct rtable *rt; |
|---|
| .. | .. |
|---|
| 383 | 406 | if (ret) |
|---|
| 384 | 407 | return ret; |
|---|
| 385 | 408 | |
|---|
| 386 | | - src_in->sin_family = AF_INET; |
|---|
| 387 | 409 | src_in->sin_addr.s_addr = fl4.saddr; |
|---|
| 388 | | - |
|---|
| 389 | | - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
|---|
| 390 | | - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network |
|---|
| 391 | | - * type accordingly. |
|---|
| 392 | | - */ |
|---|
| 393 | | - if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) |
|---|
| 394 | | - addr->network = RDMA_NETWORK_IPV4; |
|---|
| 395 | 410 | |
|---|
| 396 | 411 | addr->hoplimit = ip4_dst_hoplimit(&rt->dst); |
|---|
| 397 | 412 | |
|---|
| .. | .. |
|---|
| 400 | 415 | } |
|---|
| 401 | 416 | |
|---|
| 402 | 417 | #if IS_ENABLED(CONFIG_IPV6) |
|---|
| 403 | | -static int addr6_resolve(struct sockaddr_in6 *src_in, |
|---|
| 404 | | - const struct sockaddr_in6 *dst_in, |
|---|
| 418 | +static int addr6_resolve(struct sockaddr *src_sock, |
|---|
| 419 | + const struct sockaddr *dst_sock, |
|---|
| 405 | 420 | struct rdma_dev_addr *addr, |
|---|
| 406 | 421 | struct dst_entry **pdst) |
|---|
| 407 | 422 | { |
|---|
| 423 | + struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; |
|---|
| 424 | + const struct sockaddr_in6 *dst_in = |
|---|
| 425 | + (const struct sockaddr_in6 *)dst_sock; |
|---|
| 408 | 426 | struct flowi6 fl6; |
|---|
| 409 | 427 | struct dst_entry *dst; |
|---|
| 410 | | - struct rt6_info *rt; |
|---|
| 411 | 428 | |
|---|
| 412 | 429 | memset(&fl6, 0, sizeof fl6); |
|---|
| 413 | 430 | fl6.daddr = dst_in->sin6_addr; |
|---|
| .. | .. |
|---|
| 418 | 435 | if (IS_ERR(dst)) |
|---|
| 419 | 436 | return PTR_ERR(dst); |
|---|
| 420 | 437 | |
|---|
| 421 | | - rt = (struct rt6_info *)dst; |
|---|
| 422 | | - if (ipv6_addr_any(&src_in->sin6_addr)) { |
|---|
| 423 | | - src_in->sin6_family = AF_INET6; |
|---|
| 438 | + if (ipv6_addr_any(&src_in->sin6_addr)) |
|---|
| 424 | 439 | src_in->sin6_addr = fl6.saddr; |
|---|
| 425 | | - } |
|---|
| 426 | | - |
|---|
| 427 | | - /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're |
|---|
| 428 | | - * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network |
|---|
| 429 | | - * type accordingly. |
|---|
| 430 | | - */ |
|---|
| 431 | | - if (rt->rt6i_flags & RTF_GATEWAY && |
|---|
| 432 | | - ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) |
|---|
| 433 | | - addr->network = RDMA_NETWORK_IPV6; |
|---|
| 434 | 440 | |
|---|
| 435 | 441 | addr->hoplimit = ip6_dst_hoplimit(dst); |
|---|
| 436 | 442 | |
|---|
| .. | .. |
|---|
| 438 | 444 | return 0; |
|---|
| 439 | 445 | } |
|---|
| 440 | 446 | #else |
|---|
| 441 | | -static int addr6_resolve(struct sockaddr_in6 *src_in, |
|---|
| 442 | | - const struct sockaddr_in6 *dst_in, |
|---|
| 447 | +static int addr6_resolve(struct sockaddr *src_sock, |
|---|
| 448 | + const struct sockaddr *dst_sock, |
|---|
| 443 | 449 | struct rdma_dev_addr *addr, |
|---|
| 444 | 450 | struct dst_entry **pdst) |
|---|
| 445 | 451 | { |
|---|
| .. | .. |
|---|
| 450 | 456 | static int addr_resolve_neigh(const struct dst_entry *dst, |
|---|
| 451 | 457 | const struct sockaddr *dst_in, |
|---|
| 452 | 458 | struct rdma_dev_addr *addr, |
|---|
| 459 | + unsigned int ndev_flags, |
|---|
| 453 | 460 | u32 seq) |
|---|
| 454 | 461 | { |
|---|
| 455 | | - if (dst->dev->flags & IFF_LOOPBACK) { |
|---|
| 456 | | - int ret; |
|---|
| 462 | + int ret = 0; |
|---|
| 457 | 463 | |
|---|
| 458 | | - ret = rdma_translate_ip(dst_in, addr); |
|---|
| 459 | | - if (!ret) |
|---|
| 460 | | - memcpy(addr->dst_dev_addr, addr->src_dev_addr, |
|---|
| 461 | | - MAX_ADDR_LEN); |
|---|
| 464 | + if (ndev_flags & IFF_LOOPBACK) { |
|---|
| 465 | + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); |
|---|
| 466 | + } else { |
|---|
| 467 | + if (!(ndev_flags & IFF_NOARP)) { |
|---|
| 468 | + /* If the device doesn't do ARP internally */ |
|---|
| 469 | + ret = fetch_ha(dst, addr, dst_in, seq); |
|---|
| 470 | + } |
|---|
| 471 | + } |
|---|
| 472 | + return ret; |
|---|
| 473 | +} |
|---|
| 462 | 474 | |
|---|
| 463 | | - return ret; |
|---|
| 475 | +static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, |
|---|
| 476 | + const struct sockaddr *dst_in, |
|---|
| 477 | + const struct dst_entry *dst, |
|---|
| 478 | + const struct net_device *ndev) |
|---|
| 479 | +{ |
|---|
| 480 | + int ret = 0; |
|---|
| 481 | + |
|---|
| 482 | + if (dst->dev->flags & IFF_LOOPBACK) |
|---|
| 483 | + ret = rdma_translate_ip(dst_in, dev_addr); |
|---|
| 484 | + else |
|---|
| 485 | + rdma_copy_src_l2_addr(dev_addr, dst->dev); |
|---|
| 486 | + |
|---|
| 487 | + /* |
|---|
| 488 | + * If there's a gateway and type of device not ARPHRD_INFINIBAND, |
|---|
| 489 | + * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the |
|---|
| 490 | + * network type accordingly. |
|---|
| 491 | + */ |
|---|
| 492 | + if (has_gateway(dst, dst_in->sa_family) && |
|---|
| 493 | + ndev->type != ARPHRD_INFINIBAND) |
|---|
| 494 | + dev_addr->network = dst_in->sa_family == AF_INET ? |
|---|
| 495 | + RDMA_NETWORK_IPV4 : |
|---|
| 496 | + RDMA_NETWORK_IPV6; |
|---|
| 497 | + else |
|---|
| 498 | + dev_addr->network = RDMA_NETWORK_IB; |
|---|
| 499 | + |
|---|
| 500 | + return ret; |
|---|
| 501 | +} |
|---|
| 502 | + |
|---|
| 503 | +static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, |
|---|
| 504 | + unsigned int *ndev_flags, |
|---|
| 505 | + const struct sockaddr *dst_in, |
|---|
| 506 | + const struct dst_entry *dst) |
|---|
| 507 | +{ |
|---|
| 508 | + struct net_device *ndev = READ_ONCE(dst->dev); |
|---|
| 509 | + |
|---|
| 510 | + *ndev_flags = ndev->flags; |
|---|
| 511 | + /* A physical device must be the RDMA device to use */ |
|---|
| 512 | + if (ndev->flags & IFF_LOOPBACK) { |
|---|
| 513 | + /* |
|---|
| 514 | + * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or |
|---|
| 515 | + * loopback IP address. So if route is resolved to loopback |
|---|
| 516 | + * interface, translate that to a real ndev based on non |
|---|
| 517 | + * loopback IP address. |
|---|
| 518 | + */ |
|---|
| 519 | + ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); |
|---|
| 520 | + if (IS_ERR(ndev)) |
|---|
| 521 | + return -ENODEV; |
|---|
| 464 | 522 | } |
|---|
| 465 | 523 | |
|---|
| 466 | | - /* If the device doesn't do ARP internally */ |
|---|
| 467 | | - if (!(dst->dev->flags & IFF_NOARP)) |
|---|
| 468 | | - return fetch_ha(dst, addr, dst_in, seq); |
|---|
| 524 | + return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); |
|---|
| 525 | +} |
|---|
| 469 | 526 | |
|---|
| 470 | | - rdma_copy_addr(addr, dst->dev, NULL); |
|---|
| 527 | +static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) |
|---|
| 528 | +{ |
|---|
| 529 | + struct net_device *ndev; |
|---|
| 471 | 530 | |
|---|
| 531 | + ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); |
|---|
| 532 | + if (IS_ERR(ndev)) |
|---|
| 533 | + return PTR_ERR(ndev); |
|---|
| 534 | + |
|---|
| 535 | + /* |
|---|
| 536 | + * Since we are holding the rcu, reading net and ifindex |
|---|
| 537 | + * are safe without any additional reference; because |
|---|
| 538 | + * change_net_namespace() in net/core/dev.c does rcu sync |
|---|
| 539 | + * after it changes the state to IFF_DOWN and before |
|---|
| 540 | + * updating netdev fields {net, ifindex}. |
|---|
| 541 | + */ |
|---|
| 542 | + addr->net = dev_net(ndev); |
|---|
| 543 | + addr->bound_dev_if = ndev->ifindex; |
|---|
| 472 | 544 | return 0; |
|---|
| 545 | +} |
|---|
| 546 | + |
|---|
| 547 | +static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) |
|---|
| 548 | +{ |
|---|
| 549 | + addr->net = &init_net; |
|---|
| 550 | + addr->bound_dev_if = 0; |
|---|
| 473 | 551 | } |
|---|
| 474 | 552 | |
|---|
| 475 | 553 | static int addr_resolve(struct sockaddr *src_in, |
|---|
| 476 | 554 | const struct sockaddr *dst_in, |
|---|
| 477 | 555 | struct rdma_dev_addr *addr, |
|---|
| 478 | 556 | bool resolve_neigh, |
|---|
| 557 | + bool resolve_by_gid_attr, |
|---|
| 479 | 558 | u32 seq) |
|---|
| 480 | 559 | { |
|---|
| 481 | | - struct net_device *ndev; |
|---|
| 482 | | - struct dst_entry *dst; |
|---|
| 560 | + struct dst_entry *dst = NULL; |
|---|
| 561 | + unsigned int ndev_flags = 0; |
|---|
| 562 | + struct rtable *rt = NULL; |
|---|
| 483 | 563 | int ret; |
|---|
| 484 | 564 | |
|---|
| 485 | 565 | if (!addr->net) { |
|---|
| .. | .. |
|---|
| 487 | 567 | return -EINVAL; |
|---|
| 488 | 568 | } |
|---|
| 489 | 569 | |
|---|
| 570 | + rcu_read_lock(); |
|---|
| 571 | + if (resolve_by_gid_attr) { |
|---|
| 572 | + if (!addr->sgid_attr) { |
|---|
| 573 | + rcu_read_unlock(); |
|---|
| 574 | + pr_warn_ratelimited("%s: missing gid_attr\n", __func__); |
|---|
| 575 | + return -EINVAL; |
|---|
| 576 | + } |
|---|
| 577 | + /* |
|---|
| 578 | + * If the request is for a specific gid attribute of the |
|---|
| 579 | + * rdma_dev_addr, derive net from the netdevice of the |
|---|
| 580 | + * GID attribute. |
|---|
| 581 | + */ |
|---|
| 582 | + ret = set_addr_netns_by_gid_rcu(addr); |
|---|
| 583 | + if (ret) { |
|---|
| 584 | + rcu_read_unlock(); |
|---|
| 585 | + return ret; |
|---|
| 586 | + } |
|---|
| 587 | + } |
|---|
| 490 | 588 | if (src_in->sa_family == AF_INET) { |
|---|
| 491 | | - struct rtable *rt = NULL; |
|---|
| 492 | | - const struct sockaddr_in *dst_in4 = |
|---|
| 493 | | - (const struct sockaddr_in *)dst_in; |
|---|
| 494 | | - |
|---|
| 495 | | - ret = addr4_resolve((struct sockaddr_in *)src_in, |
|---|
| 496 | | - dst_in4, addr, &rt); |
|---|
| 497 | | - if (ret) |
|---|
| 498 | | - return ret; |
|---|
| 499 | | - |
|---|
| 500 | | - if (resolve_neigh) |
|---|
| 501 | | - ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); |
|---|
| 502 | | - |
|---|
| 503 | | - if (addr->bound_dev_if) { |
|---|
| 504 | | - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); |
|---|
| 505 | | - } else { |
|---|
| 506 | | - ndev = rt->dst.dev; |
|---|
| 507 | | - dev_hold(ndev); |
|---|
| 508 | | - } |
|---|
| 509 | | - |
|---|
| 510 | | - ip_rt_put(rt); |
|---|
| 589 | + ret = addr4_resolve(src_in, dst_in, addr, &rt); |
|---|
| 590 | + dst = &rt->dst; |
|---|
| 511 | 591 | } else { |
|---|
| 512 | | - const struct sockaddr_in6 *dst_in6 = |
|---|
| 513 | | - (const struct sockaddr_in6 *)dst_in; |
|---|
| 592 | + ret = addr6_resolve(src_in, dst_in, addr, &dst); |
|---|
| 593 | + } |
|---|
| 594 | + if (ret) { |
|---|
| 595 | + rcu_read_unlock(); |
|---|
| 596 | + goto done; |
|---|
| 597 | + } |
|---|
| 598 | + ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); |
|---|
| 599 | + rcu_read_unlock(); |
|---|
| 514 | 600 | |
|---|
| 515 | | - ret = addr6_resolve((struct sockaddr_in6 *)src_in, |
|---|
| 516 | | - dst_in6, addr, |
|---|
| 517 | | - &dst); |
|---|
| 518 | | - if (ret) |
|---|
| 519 | | - return ret; |
|---|
| 601 | + /* |
|---|
| 602 | + * Resolve neighbor destination address if requested and |
|---|
| 603 | + * only if src addr translation didn't fail. |
|---|
| 604 | + */ |
|---|
| 605 | + if (!ret && resolve_neigh) |
|---|
| 606 | + ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); |
|---|
| 520 | 607 | |
|---|
| 521 | | - if (resolve_neigh) |
|---|
| 522 | | - ret = addr_resolve_neigh(dst, dst_in, addr, seq); |
|---|
| 523 | | - |
|---|
| 524 | | - if (addr->bound_dev_if) { |
|---|
| 525 | | - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); |
|---|
| 526 | | - } else { |
|---|
| 527 | | - ndev = dst->dev; |
|---|
| 528 | | - dev_hold(ndev); |
|---|
| 529 | | - } |
|---|
| 530 | | - |
|---|
| 608 | + if (src_in->sa_family == AF_INET) |
|---|
| 609 | + ip_rt_put(rt); |
|---|
| 610 | + else |
|---|
| 531 | 611 | dst_release(dst); |
|---|
| 532 | | - } |
|---|
| 533 | | - |
|---|
| 534 | | - if (ndev) { |
|---|
| 535 | | - if (ndev->flags & IFF_LOOPBACK) |
|---|
| 536 | | - ret = rdma_translate_ip(dst_in, addr); |
|---|
| 537 | | - else |
|---|
| 538 | | - addr->bound_dev_if = ndev->ifindex; |
|---|
| 539 | | - dev_put(ndev); |
|---|
| 540 | | - } |
|---|
| 541 | | - |
|---|
| 612 | +done: |
|---|
| 613 | + /* |
|---|
| 614 | + * Clear the addr net to go back to its original state, only if it was |
|---|
| 615 | + * derived from GID attribute in this context. |
|---|
| 616 | + */ |
|---|
| 617 | + if (resolve_by_gid_attr) |
|---|
| 618 | + rdma_addr_set_net_defaults(addr); |
|---|
| 542 | 619 | return ret; |
|---|
| 543 | 620 | } |
|---|
| 544 | 621 | |
|---|
| .. | .. |
|---|
| 553 | 630 | src_in = (struct sockaddr *)&req->src_addr; |
|---|
| 554 | 631 | dst_in = (struct sockaddr *)&req->dst_addr; |
|---|
| 555 | 632 | req->status = addr_resolve(src_in, dst_in, req->addr, |
|---|
| 556 | | - true, req->seq); |
|---|
| 633 | + true, req->resolve_by_gid_attr, |
|---|
| 634 | + req->seq); |
|---|
| 557 | 635 | if (req->status && time_after_eq(jiffies, req->timeout)) { |
|---|
| 558 | 636 | req->status = -ETIMEDOUT; |
|---|
| 559 | 637 | } else if (req->status == -ENODATA) { |
|---|
| .. | .. |
|---|
| 584 | 662 | } |
|---|
| 585 | 663 | |
|---|
| 586 | 664 | int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, |
|---|
| 587 | | - struct rdma_dev_addr *addr, int timeout_ms, |
|---|
| 665 | + struct rdma_dev_addr *addr, unsigned long timeout_ms, |
|---|
| 588 | 666 | void (*callback)(int status, struct sockaddr *src_addr, |
|---|
| 589 | 667 | struct rdma_dev_addr *addr, void *context), |
|---|
| 590 | | - void *context) |
|---|
| 668 | + bool resolve_by_gid_attr, void *context) |
|---|
| 591 | 669 | { |
|---|
| 592 | 670 | struct sockaddr *src_in, *dst_in; |
|---|
| 593 | 671 | struct addr_req *req; |
|---|
| .. | .. |
|---|
| 615 | 693 | req->addr = addr; |
|---|
| 616 | 694 | req->callback = callback; |
|---|
| 617 | 695 | req->context = context; |
|---|
| 696 | + req->resolve_by_gid_attr = resolve_by_gid_attr; |
|---|
| 618 | 697 | INIT_DELAYED_WORK(&req->work, process_one_req); |
|---|
| 619 | 698 | req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); |
|---|
| 620 | 699 | |
|---|
| 621 | | - req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); |
|---|
| 700 | + req->status = addr_resolve(src_in, dst_in, addr, true, |
|---|
| 701 | + req->resolve_by_gid_attr, req->seq); |
|---|
| 622 | 702 | switch (req->status) { |
|---|
| 623 | 703 | case 0: |
|---|
| 624 | 704 | req->timeout = jiffies; |
|---|
| .. | .. |
|---|
| 639 | 719 | } |
|---|
| 640 | 720 | EXPORT_SYMBOL(rdma_resolve_ip); |
|---|
| 641 | 721 | |
|---|
| 642 | | -int rdma_resolve_ip_route(struct sockaddr *src_addr, |
|---|
| 643 | | - const struct sockaddr *dst_addr, |
|---|
| 644 | | - struct rdma_dev_addr *addr) |
|---|
| 722 | +int roce_resolve_route_from_path(struct sa_path_rec *rec, |
|---|
| 723 | + const struct ib_gid_attr *attr) |
|---|
| 645 | 724 | { |
|---|
| 646 | | - struct sockaddr_storage ssrc_addr = {}; |
|---|
| 647 | | - struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; |
|---|
| 725 | + union { |
|---|
| 726 | + struct sockaddr _sockaddr; |
|---|
| 727 | + struct sockaddr_in _sockaddr_in; |
|---|
| 728 | + struct sockaddr_in6 _sockaddr_in6; |
|---|
| 729 | + } sgid, dgid; |
|---|
| 730 | + struct rdma_dev_addr dev_addr = {}; |
|---|
| 731 | + int ret; |
|---|
| 648 | 732 | |
|---|
| 649 | | - if (src_addr) { |
|---|
| 650 | | - if (src_addr->sa_family != dst_addr->sa_family) |
|---|
| 651 | | - return -EINVAL; |
|---|
| 733 | + might_sleep(); |
|---|
| 652 | 734 | |
|---|
| 653 | | - memcpy(src_in, src_addr, rdma_addr_size(src_addr)); |
|---|
| 654 | | - } else { |
|---|
| 655 | | - src_in->sa_family = dst_addr->sa_family; |
|---|
| 656 | | - } |
|---|
| 735 | + if (rec->roce.route_resolved) |
|---|
| 736 | + return 0; |
|---|
| 657 | 737 | |
|---|
| 658 | | - return addr_resolve(src_in, dst_addr, addr, false, 0); |
|---|
| 738 | + rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); |
|---|
| 739 | + rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); |
|---|
| 740 | + |
|---|
| 741 | + if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) |
|---|
| 742 | + return -EINVAL; |
|---|
| 743 | + |
|---|
| 744 | + if (!attr || !attr->ndev) |
|---|
| 745 | + return -EINVAL; |
|---|
| 746 | + |
|---|
| 747 | + dev_addr.net = &init_net; |
|---|
| 748 | + dev_addr.sgid_attr = attr; |
|---|
| 749 | + |
|---|
| 750 | + ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, |
|---|
| 751 | + &dev_addr, false, true, 0); |
|---|
| 752 | + if (ret) |
|---|
| 753 | + return ret; |
|---|
| 754 | + |
|---|
| 755 | + if ((dev_addr.network == RDMA_NETWORK_IPV4 || |
|---|
| 756 | + dev_addr.network == RDMA_NETWORK_IPV6) && |
|---|
| 757 | + rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) |
|---|
| 758 | + return -EINVAL; |
|---|
| 759 | + |
|---|
| 760 | + rec->roce.route_resolved = true; |
|---|
| 761 | + return 0; |
|---|
| 659 | 762 | } |
|---|
| 660 | 763 | |
|---|
| 764 | +/** |
|---|
| 765 | + * rdma_addr_cancel - Cancel resolve ip request |
|---|
| 766 | + * @addr: Pointer to address structure given previously |
|---|
| 767 | + * during rdma_resolve_ip(). |
|---|
| 768 | + * rdma_addr_cancel() is synchronous function which cancels any pending |
|---|
| 769 | + * request if there is any. |
|---|
| 770 | + */ |
|---|
| 661 | 771 | void rdma_addr_cancel(struct rdma_dev_addr *addr) |
|---|
| 662 | 772 | { |
|---|
| 663 | 773 | struct addr_req *req, *temp_req; |
|---|
| .. | .. |
|---|
| 685 | 795 | * guarentees no work is running and none will be started. |
|---|
| 686 | 796 | */ |
|---|
| 687 | 797 | cancel_delayed_work_sync(&found->work); |
|---|
| 688 | | - |
|---|
| 689 | | - if (found->callback) |
|---|
| 690 | | - found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr, |
|---|
| 691 | | - found->addr, found->context); |
|---|
| 692 | | - |
|---|
| 693 | 798 | kfree(found); |
|---|
| 694 | 799 | } |
|---|
| 695 | 800 | EXPORT_SYMBOL(rdma_addr_cancel); |
|---|
| .. | .. |
|---|
| 708 | 813 | |
|---|
| 709 | 814 | int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, |
|---|
| 710 | 815 | const union ib_gid *dgid, |
|---|
| 711 | | - u8 *dmac, const struct net_device *ndev, |
|---|
| 816 | + u8 *dmac, const struct ib_gid_attr *sgid_attr, |
|---|
| 712 | 817 | int *hoplimit) |
|---|
| 713 | 818 | { |
|---|
| 714 | 819 | struct rdma_dev_addr dev_addr; |
|---|
| .. | .. |
|---|
| 723 | 828 | rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); |
|---|
| 724 | 829 | |
|---|
| 725 | 830 | memset(&dev_addr, 0, sizeof(dev_addr)); |
|---|
| 726 | | - dev_addr.bound_dev_if = ndev->ifindex; |
|---|
| 727 | 831 | dev_addr.net = &init_net; |
|---|
| 832 | + dev_addr.sgid_attr = sgid_attr; |
|---|
| 728 | 833 | |
|---|
| 729 | 834 | init_completion(&ctx.comp); |
|---|
| 730 | 835 | ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, |
|---|
| 731 | 836 | (struct sockaddr *)&dgid_addr, &dev_addr, 1000, |
|---|
| 732 | | - resolve_cb, &ctx); |
|---|
| 837 | + resolve_cb, true, &ctx); |
|---|
| 733 | 838 | if (ret) |
|---|
| 734 | 839 | return ret; |
|---|
| 735 | 840 | |
|---|