| .. | .. | 
|---|
| 42 | 42 | #include <net/neighbour.h> | 
|---|
| 43 | 43 | #include <net/route.h> | 
|---|
| 44 | 44 | #include <net/netevent.h> | 
|---|
| 45 |  | -#include <net/addrconf.h> | 
|---|
|  | 45 | +#include <net/ipv6_stubs.h> | 
|---|
| 46 | 46 | #include <net/ip6_route.h> | 
|---|
| 47 | 47 | #include <rdma/ib_addr.h> | 
|---|
|  | 48 | +#include <rdma/ib_cache.h> | 
|---|
|  | 49 | +#include <rdma/ib_sa.h> | 
|---|
| 48 | 50 | #include <rdma/ib.h> | 
|---|
| 49 | 51 | #include <rdma/rdma_netlink.h> | 
|---|
| 50 | 52 | #include <net/netlink.h> | 
|---|
| .. | .. | 
|---|
| 61 | 63 | struct rdma_dev_addr *addr, void *context); | 
|---|
| 62 | 64 | unsigned long timeout; | 
|---|
| 63 | 65 | struct delayed_work work; | 
|---|
|  | 66 | +	bool resolve_by_gid_attr;	/* Consider gid attr in resolve phase */ | 
|---|
| 64 | 67 | int status; | 
|---|
| 65 | 68 | u32 seq; | 
|---|
| 66 | 69 | }; | 
|---|
| .. | .. | 
|---|
| 73 | 76 |  | 
|---|
| 74 | 77 | static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { | 
|---|
| 75 | 78 | [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, | 
|---|
| 76 |  | -		.len = sizeof(struct rdma_nla_ls_gid)}, | 
|---|
|  | 79 | +		.len = sizeof(struct rdma_nla_ls_gid), | 
|---|
|  | 80 | +		.validation_type = NLA_VALIDATE_MIN, | 
|---|
|  | 81 | +		.min = sizeof(struct rdma_nla_ls_gid)}, | 
|---|
| 77 | 82 | }; | 
|---|
| 78 | 83 |  | 
|---|
| 79 | 84 | static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) | 
|---|
| .. | .. | 
|---|
| 84 | 89 | if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) | 
|---|
| 85 | 90 | return false; | 
|---|
| 86 | 91 |  | 
|---|
| 87 |  | -	ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), | 
|---|
| 88 |  | -			nlmsg_len(nlh), ib_nl_addr_policy, NULL); | 
|---|
|  | 92 | +	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), | 
|---|
|  | 93 | +				   nlmsg_len(nlh), ib_nl_addr_policy, NULL); | 
|---|
| 89 | 94 | if (ret) | 
|---|
| 90 | 95 | return false; | 
|---|
| 91 | 96 |  | 
|---|
| .. | .. | 
|---|
| 180 | 185 |  | 
|---|
| 181 | 186 | /* Repair the nlmsg header length */ | 
|---|
| 182 | 187 | nlmsg_end(skb, nlh); | 
|---|
| 183 |  | -	rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); | 
|---|
|  | 188 | +	rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); | 
|---|
| 184 | 189 |  | 
|---|
| 185 | 190 | /* Make the request retry, so when we get the response from userspace | 
|---|
| 186 | 191 | * we will have something. | 
|---|
| .. | .. | 
|---|
| 219 | 224 | } | 
|---|
| 220 | 225 | EXPORT_SYMBOL(rdma_addr_size_kss); | 
|---|
| 221 | 226 |  | 
|---|
| 222 |  | -void rdma_copy_addr(struct rdma_dev_addr *dev_addr, | 
|---|
| 223 |  | -		    const struct net_device *dev, | 
|---|
| 224 |  | -		    const unsigned char *dst_dev_addr) | 
|---|
|  | 227 | +/** | 
|---|
|  | 228 | + * rdma_copy_src_l2_addr - Copy netdevice source addresses | 
|---|
|  | 229 | + * @dev_addr:	Destination address pointer where to copy the addresses | 
|---|
|  | 230 | + * @dev:	Netdevice whose source addresses to copy | 
|---|
|  | 231 | + * | 
|---|
|  | 232 | + * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. | 
|---|
|  | 233 | + * This includes unicast address, broadcast address, device type and | 
|---|
|  | 234 | + * interface index. | 
|---|
|  | 235 | + */ | 
|---|
|  | 236 | +void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, | 
|---|
|  | 237 | +			   const struct net_device *dev) | 
|---|
| 225 | 238 | { | 
|---|
| 226 | 239 | dev_addr->dev_type = dev->type; | 
|---|
| 227 | 240 | memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); | 
|---|
| 228 | 241 | memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); | 
|---|
| 229 |  | -	if (dst_dev_addr) | 
|---|
| 230 |  | -		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); | 
|---|
| 231 | 242 | dev_addr->bound_dev_if = dev->ifindex; | 
|---|
| 232 | 243 | } | 
|---|
| 233 |  | -EXPORT_SYMBOL(rdma_copy_addr); | 
|---|
|  | 244 | +EXPORT_SYMBOL(rdma_copy_src_l2_addr); | 
|---|
|  | 245 | + | 
|---|
|  | 246 | +static struct net_device * | 
|---|
|  | 247 | +rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) | 
|---|
|  | 248 | +{ | 
|---|
|  | 249 | +	struct net_device *dev = NULL; | 
|---|
|  | 250 | +	int ret = -EADDRNOTAVAIL; | 
|---|
|  | 251 | + | 
|---|
|  | 252 | +	switch (src_in->sa_family) { | 
|---|
|  | 253 | +	case AF_INET: | 
|---|
|  | 254 | +		dev = __ip_dev_find(net, | 
|---|
|  | 255 | +				    ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, | 
|---|
|  | 256 | +				    false); | 
|---|
|  | 257 | +		if (dev) | 
|---|
|  | 258 | +			ret = 0; | 
|---|
|  | 259 | +		break; | 
|---|
|  | 260 | +#if IS_ENABLED(CONFIG_IPV6) | 
|---|
|  | 261 | +	case AF_INET6: | 
|---|
|  | 262 | +		for_each_netdev_rcu(net, dev) { | 
|---|
|  | 263 | +			if (ipv6_chk_addr(net, | 
|---|
|  | 264 | +					  &((const struct sockaddr_in6 *)src_in)->sin6_addr, | 
|---|
|  | 265 | +					  dev, 1)) { | 
|---|
|  | 266 | +				ret = 0; | 
|---|
|  | 267 | +				break; | 
|---|
|  | 268 | +			} | 
|---|
|  | 269 | +		} | 
|---|
|  | 270 | +		break; | 
|---|
|  | 271 | +#endif | 
|---|
|  | 272 | +	} | 
|---|
|  | 273 | +	return ret ? ERR_PTR(ret) : dev; | 
|---|
|  | 274 | +} | 
|---|
| 234 | 275 |  | 
|---|
| 235 | 276 | int rdma_translate_ip(const struct sockaddr *addr, | 
|---|
| 236 | 277 | struct rdma_dev_addr *dev_addr) | 
|---|
| .. | .. | 
|---|
| 241 | 282 | dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); | 
|---|
| 242 | 283 | if (!dev) | 
|---|
| 243 | 284 | return -ENODEV; | 
|---|
| 244 |  | -		rdma_copy_addr(dev_addr, dev, NULL); | 
|---|
|  | 285 | +		rdma_copy_src_l2_addr(dev_addr, dev); | 
|---|
| 245 | 286 | dev_put(dev); | 
|---|
| 246 | 287 | return 0; | 
|---|
| 247 | 288 | } | 
|---|
| 248 | 289 |  | 
|---|
| 249 |  | -	switch (addr->sa_family) { | 
|---|
| 250 |  | -	case AF_INET: | 
|---|
| 251 |  | -		dev = ip_dev_find(dev_addr->net, | 
|---|
| 252 |  | -			((const struct sockaddr_in *)addr)->sin_addr.s_addr); | 
|---|
| 253 |  | - | 
|---|
| 254 |  | -		if (!dev) | 
|---|
| 255 |  | -			return -EADDRNOTAVAIL; | 
|---|
| 256 |  | - | 
|---|
| 257 |  | -		rdma_copy_addr(dev_addr, dev, NULL); | 
|---|
| 258 |  | -		dev_put(dev); | 
|---|
| 259 |  | -		break; | 
|---|
| 260 |  | -#if IS_ENABLED(CONFIG_IPV6) | 
|---|
| 261 |  | -	case AF_INET6: | 
|---|
| 262 |  | -		rcu_read_lock(); | 
|---|
| 263 |  | -		for_each_netdev_rcu(dev_addr->net, dev) { | 
|---|
| 264 |  | -			if (ipv6_chk_addr(dev_addr->net, | 
|---|
| 265 |  | -					  &((const struct sockaddr_in6 *)addr)->sin6_addr, | 
|---|
| 266 |  | -					  dev, 1)) { | 
|---|
| 267 |  | -				rdma_copy_addr(dev_addr, dev, NULL); | 
|---|
| 268 |  | -				break; | 
|---|
| 269 |  | -			} | 
|---|
| 270 |  | -		} | 
|---|
| 271 |  | -		rcu_read_unlock(); | 
|---|
| 272 |  | -		break; | 
|---|
| 273 |  | -#endif | 
|---|
| 274 |  | -	} | 
|---|
| 275 |  | -	return 0; | 
|---|
|  | 290 | +	rcu_read_lock(); | 
|---|
|  | 291 | +	dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); | 
|---|
|  | 292 | +	if (!IS_ERR(dev)) | 
|---|
|  | 293 | +		rdma_copy_src_l2_addr(dev_addr, dev); | 
|---|
|  | 294 | +	rcu_read_unlock(); | 
|---|
|  | 295 | +	return PTR_ERR_OR_ZERO(dev); | 
|---|
| 276 | 296 | } | 
|---|
| 277 | 297 | EXPORT_SYMBOL(rdma_translate_ip); | 
|---|
| 278 | 298 |  | 
|---|
| .. | .. | 
|---|
| 295 | 315 | spin_unlock_bh(&lock); | 
|---|
| 296 | 316 | } | 
|---|
| 297 | 317 |  | 
|---|
| 298 |  | -static int ib_nl_fetch_ha(const struct dst_entry *dst, | 
|---|
| 299 |  | -			  struct rdma_dev_addr *dev_addr, | 
|---|
|  | 318 | +static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, | 
|---|
| 300 | 319 | const void *daddr, u32 seq, u16 family) | 
|---|
| 301 | 320 | { | 
|---|
| 302 |  | -	if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) | 
|---|
|  | 321 | +	if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) | 
|---|
| 303 | 322 | return -EADDRNOTAVAIL; | 
|---|
| 304 | 323 |  | 
|---|
| 305 |  | -	/* We fill in what we can, the response will fill the rest */ | 
|---|
| 306 |  | -	rdma_copy_addr(dev_addr, dst->dev, NULL); | 
|---|
| 307 | 324 | return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); | 
|---|
| 308 | 325 | } | 
|---|
| 309 | 326 |  | 
|---|
| .. | .. | 
|---|
| 322 | 339 | neigh_event_send(n, NULL); | 
|---|
| 323 | 340 | ret = -ENODATA; | 
|---|
| 324 | 341 | } else { | 
|---|
| 325 |  | -		rdma_copy_addr(dev_addr, dst->dev, n->ha); | 
|---|
|  | 342 | +		neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); | 
|---|
| 326 | 343 | } | 
|---|
| 327 | 344 |  | 
|---|
| 328 | 345 | neigh_release(n); | 
|---|
| .. | .. | 
|---|
| 356 | 373 | (const void *)&dst_in6->sin6_addr; | 
|---|
| 357 | 374 | sa_family_t family = dst_in->sa_family; | 
|---|
| 358 | 375 |  | 
|---|
| 359 |  | -	/* Gateway + ARPHRD_INFINIBAND -> IB router */ | 
|---|
| 360 |  | -	if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) | 
|---|
| 361 |  | -		return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); | 
|---|
|  | 376 | +	might_sleep(); | 
|---|
|  | 377 | + | 
|---|
|  | 378 | +	/* If we have a gateway in IB mode then it must be an IB network */ | 
|---|
|  | 379 | +	if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) | 
|---|
|  | 380 | +		return ib_nl_fetch_ha(dev_addr, daddr, seq, family); | 
|---|
| 362 | 381 | else | 
|---|
| 363 | 382 | return dst_fetch_ha(dst, dev_addr, daddr); | 
|---|
| 364 | 383 | } | 
|---|
| 365 | 384 |  | 
|---|
| 366 |  | -static int addr4_resolve(struct sockaddr_in *src_in, | 
|---|
| 367 |  | -			 const struct sockaddr_in *dst_in, | 
|---|
|  | 385 | +static int addr4_resolve(struct sockaddr *src_sock, | 
|---|
|  | 386 | +			 const struct sockaddr *dst_sock, | 
|---|
| 368 | 387 | struct rdma_dev_addr *addr, | 
|---|
| 369 | 388 | struct rtable **prt) | 
|---|
| 370 | 389 | { | 
|---|
|  | 390 | +	struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; | 
|---|
|  | 391 | +	const struct sockaddr_in *dst_in = | 
|---|
|  | 392 | +			(const struct sockaddr_in *)dst_sock; | 
|---|
|  | 393 | + | 
|---|
| 371 | 394 | __be32 src_ip = src_in->sin_addr.s_addr; | 
|---|
| 372 | 395 | __be32 dst_ip = dst_in->sin_addr.s_addr; | 
|---|
| 373 | 396 | struct rtable *rt; | 
|---|
| .. | .. | 
|---|
| 383 | 406 | if (ret) | 
|---|
| 384 | 407 | return ret; | 
|---|
| 385 | 408 |  | 
|---|
| 386 |  | -	src_in->sin_family = AF_INET; | 
|---|
| 387 | 409 | src_in->sin_addr.s_addr = fl4.saddr; | 
|---|
| 388 |  | - | 
|---|
| 389 |  | -	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're | 
|---|
| 390 |  | -	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | 
|---|
| 391 |  | -	 * type accordingly. | 
|---|
| 392 |  | -	 */ | 
|---|
| 393 |  | -	if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) | 
|---|
| 394 |  | -		addr->network = RDMA_NETWORK_IPV4; | 
|---|
| 395 | 410 |  | 
|---|
| 396 | 411 | addr->hoplimit = ip4_dst_hoplimit(&rt->dst); | 
|---|
| 397 | 412 |  | 
|---|
| .. | .. | 
|---|
| 400 | 415 | } | 
|---|
| 401 | 416 |  | 
|---|
| 402 | 417 | #if IS_ENABLED(CONFIG_IPV6) | 
|---|
| 403 |  | -static int addr6_resolve(struct sockaddr_in6 *src_in, | 
|---|
| 404 |  | -			 const struct sockaddr_in6 *dst_in, | 
|---|
|  | 418 | +static int addr6_resolve(struct sockaddr *src_sock, | 
|---|
|  | 419 | +			 const struct sockaddr *dst_sock, | 
|---|
| 405 | 420 | struct rdma_dev_addr *addr, | 
|---|
| 406 | 421 | struct dst_entry **pdst) | 
|---|
| 407 | 422 | { | 
|---|
|  | 423 | +	struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; | 
|---|
|  | 424 | +	const struct sockaddr_in6 *dst_in = | 
|---|
|  | 425 | +				(const struct sockaddr_in6 *)dst_sock; | 
|---|
| 408 | 426 | struct flowi6 fl6; | 
|---|
| 409 | 427 | struct dst_entry *dst; | 
|---|
| 410 |  | -	struct rt6_info *rt; | 
|---|
| 411 | 428 |  | 
|---|
| 412 | 429 | memset(&fl6, 0, sizeof fl6); | 
|---|
| 413 | 430 | fl6.daddr = dst_in->sin6_addr; | 
|---|
| .. | .. | 
|---|
| 418 | 435 | if (IS_ERR(dst)) | 
|---|
| 419 | 436 | return PTR_ERR(dst); | 
|---|
| 420 | 437 |  | 
|---|
| 421 |  | -	rt = (struct rt6_info *)dst; | 
|---|
| 422 |  | -	if (ipv6_addr_any(&src_in->sin6_addr)) { | 
|---|
| 423 |  | -		src_in->sin6_family = AF_INET6; | 
|---|
|  | 438 | +	if (ipv6_addr_any(&src_in->sin6_addr)) | 
|---|
| 424 | 439 | src_in->sin6_addr = fl6.saddr; | 
|---|
| 425 |  | -	} | 
|---|
| 426 |  | - | 
|---|
| 427 |  | -	/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're | 
|---|
| 428 |  | -	 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network | 
|---|
| 429 |  | -	 * type accordingly. | 
|---|
| 430 |  | -	 */ | 
|---|
| 431 |  | -	if (rt->rt6i_flags & RTF_GATEWAY && | 
|---|
| 432 |  | -	    ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) | 
|---|
| 433 |  | -		addr->network = RDMA_NETWORK_IPV6; | 
|---|
| 434 | 440 |  | 
|---|
| 435 | 441 | addr->hoplimit = ip6_dst_hoplimit(dst); | 
|---|
| 436 | 442 |  | 
|---|
| .. | .. | 
|---|
| 438 | 444 | return 0; | 
|---|
| 439 | 445 | } | 
|---|
| 440 | 446 | #else | 
|---|
| 441 |  | -static int addr6_resolve(struct sockaddr_in6 *src_in, | 
|---|
| 442 |  | -			 const struct sockaddr_in6 *dst_in, | 
|---|
|  | 447 | +static int addr6_resolve(struct sockaddr *src_sock, | 
|---|
|  | 448 | +			 const struct sockaddr *dst_sock, | 
|---|
| 443 | 449 | struct rdma_dev_addr *addr, | 
|---|
| 444 | 450 | struct dst_entry **pdst) | 
|---|
| 445 | 451 | { | 
|---|
| .. | .. | 
|---|
| 450 | 456 | static int addr_resolve_neigh(const struct dst_entry *dst, | 
|---|
| 451 | 457 | const struct sockaddr *dst_in, | 
|---|
| 452 | 458 | struct rdma_dev_addr *addr, | 
|---|
|  | 459 | +			      unsigned int ndev_flags, | 
|---|
| 453 | 460 | u32 seq) | 
|---|
| 454 | 461 | { | 
|---|
| 455 |  | -	if (dst->dev->flags & IFF_LOOPBACK) { | 
|---|
| 456 |  | -		int ret; | 
|---|
|  | 462 | +	int ret = 0; | 
|---|
| 457 | 463 |  | 
|---|
| 458 |  | -		ret = rdma_translate_ip(dst_in, addr); | 
|---|
| 459 |  | -		if (!ret) | 
|---|
| 460 |  | -			memcpy(addr->dst_dev_addr, addr->src_dev_addr, | 
|---|
| 461 |  | -			       MAX_ADDR_LEN); | 
|---|
|  | 464 | +	if (ndev_flags & IFF_LOOPBACK) { | 
|---|
|  | 465 | +		memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); | 
|---|
|  | 466 | +	} else { | 
|---|
|  | 467 | +		if (!(ndev_flags & IFF_NOARP)) { | 
|---|
|  | 468 | +			/* If the device doesn't do ARP internally */ | 
|---|
|  | 469 | +			ret = fetch_ha(dst, addr, dst_in, seq); | 
|---|
|  | 470 | +		} | 
|---|
|  | 471 | +	} | 
|---|
|  | 472 | +	return ret; | 
|---|
|  | 473 | +} | 
|---|
| 462 | 474 |  | 
|---|
| 463 |  | -		return ret; | 
|---|
|  | 475 | +static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, | 
|---|
|  | 476 | +			    const struct sockaddr *dst_in, | 
|---|
|  | 477 | +			    const struct dst_entry *dst, | 
|---|
|  | 478 | +			    const struct net_device *ndev) | 
|---|
|  | 479 | +{ | 
|---|
|  | 480 | +	int ret = 0; | 
|---|
|  | 481 | + | 
|---|
|  | 482 | +	if (dst->dev->flags & IFF_LOOPBACK) | 
|---|
|  | 483 | +		ret = rdma_translate_ip(dst_in, dev_addr); | 
|---|
|  | 484 | +	else | 
|---|
|  | 485 | +		rdma_copy_src_l2_addr(dev_addr, dst->dev); | 
|---|
|  | 486 | + | 
|---|
|  | 487 | +	/* | 
|---|
|  | 488 | +	 * If there's a gateway and type of device not ARPHRD_INFINIBAND, | 
|---|
|  | 489 | +	 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the | 
|---|
|  | 490 | +	 * network type accordingly. | 
|---|
|  | 491 | +	 */ | 
|---|
|  | 492 | +	if (has_gateway(dst, dst_in->sa_family) && | 
|---|
|  | 493 | +	    ndev->type != ARPHRD_INFINIBAND) | 
|---|
|  | 494 | +		dev_addr->network = dst_in->sa_family == AF_INET ? | 
|---|
|  | 495 | +						RDMA_NETWORK_IPV4 : | 
|---|
|  | 496 | +						RDMA_NETWORK_IPV6; | 
|---|
|  | 497 | +	else | 
|---|
|  | 498 | +		dev_addr->network = RDMA_NETWORK_IB; | 
|---|
|  | 499 | + | 
|---|
|  | 500 | +	return ret; | 
|---|
|  | 501 | +} | 
|---|
|  | 502 | + | 
|---|
|  | 503 | +static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, | 
|---|
|  | 504 | +				 unsigned int *ndev_flags, | 
|---|
|  | 505 | +				 const struct sockaddr *dst_in, | 
|---|
|  | 506 | +				 const struct dst_entry *dst) | 
|---|
|  | 507 | +{ | 
|---|
|  | 508 | +	struct net_device *ndev = READ_ONCE(dst->dev); | 
|---|
|  | 509 | + | 
|---|
|  | 510 | +	*ndev_flags = ndev->flags; | 
|---|
|  | 511 | +	/* A physical device must be the RDMA device to use */ | 
|---|
|  | 512 | +	if (ndev->flags & IFF_LOOPBACK) { | 
|---|
|  | 513 | +		/* | 
|---|
|  | 514 | +		 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or | 
|---|
|  | 515 | +		 * loopback IP address. So if route is resolved to loopback | 
|---|
|  | 516 | +		 * interface, translate that to a real ndev based on non | 
|---|
|  | 517 | +		 * loopback IP address. | 
|---|
|  | 518 | +		 */ | 
|---|
|  | 519 | +		ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); | 
|---|
|  | 520 | +		if (IS_ERR(ndev)) | 
|---|
|  | 521 | +			return -ENODEV; | 
|---|
| 464 | 522 | } | 
|---|
| 465 | 523 |  | 
|---|
| 466 |  | -	/* If the device doesn't do ARP internally */ | 
|---|
| 467 |  | -	if (!(dst->dev->flags & IFF_NOARP)) | 
|---|
| 468 |  | -		return fetch_ha(dst, addr, dst_in, seq); | 
|---|
|  | 524 | +	return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); | 
|---|
|  | 525 | +} | 
|---|
| 469 | 526 |  | 
|---|
| 470 |  | -	rdma_copy_addr(addr, dst->dev, NULL); | 
|---|
|  | 527 | +static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) | 
|---|
|  | 528 | +{ | 
|---|
|  | 529 | +	struct net_device *ndev; | 
|---|
| 471 | 530 |  | 
|---|
|  | 531 | +	ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); | 
|---|
|  | 532 | +	if (IS_ERR(ndev)) | 
|---|
|  | 533 | +		return PTR_ERR(ndev); | 
|---|
|  | 534 | + | 
|---|
|  | 535 | +	/* | 
|---|
|  | 536 | +	 * Since we are holding the rcu, reading net and ifindex | 
|---|
|  | 537 | +	 * are safe without any additional reference; because | 
|---|
|  | 538 | +	 * change_net_namespace() in net/core/dev.c does rcu sync | 
|---|
|  | 539 | +	 * after it changes the state to IFF_DOWN and before | 
|---|
|  | 540 | +	 * updating netdev fields {net, ifindex}. | 
|---|
|  | 541 | +	 */ | 
|---|
|  | 542 | +	addr->net = dev_net(ndev); | 
|---|
|  | 543 | +	addr->bound_dev_if = ndev->ifindex; | 
|---|
| 472 | 544 | return 0; | 
|---|
|  | 545 | +} | 
|---|
|  | 546 | + | 
|---|
|  | 547 | +static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) | 
|---|
|  | 548 | +{ | 
|---|
|  | 549 | +	addr->net = &init_net; | 
|---|
|  | 550 | +	addr->bound_dev_if = 0; | 
|---|
| 473 | 551 | } | 
|---|
| 474 | 552 |  | 
|---|
| 475 | 553 | static int addr_resolve(struct sockaddr *src_in, | 
|---|
| 476 | 554 | const struct sockaddr *dst_in, | 
|---|
| 477 | 555 | struct rdma_dev_addr *addr, | 
|---|
| 478 | 556 | bool resolve_neigh, | 
|---|
|  | 557 | +			bool resolve_by_gid_attr, | 
|---|
| 479 | 558 | u32 seq) | 
|---|
| 480 | 559 | { | 
|---|
| 481 |  | -	struct net_device *ndev; | 
|---|
| 482 |  | -	struct dst_entry *dst; | 
|---|
|  | 560 | +	struct dst_entry *dst = NULL; | 
|---|
|  | 561 | +	unsigned int ndev_flags = 0; | 
|---|
|  | 562 | +	struct rtable *rt = NULL; | 
|---|
| 483 | 563 | int ret; | 
|---|
| 484 | 564 |  | 
|---|
| 485 | 565 | if (!addr->net) { | 
|---|
| .. | .. | 
|---|
| 487 | 567 | return -EINVAL; | 
|---|
| 488 | 568 | } | 
|---|
| 489 | 569 |  | 
|---|
|  | 570 | +	rcu_read_lock(); | 
|---|
|  | 571 | +	if (resolve_by_gid_attr) { | 
|---|
|  | 572 | +		if (!addr->sgid_attr) { | 
|---|
|  | 573 | +			rcu_read_unlock(); | 
|---|
|  | 574 | +			pr_warn_ratelimited("%s: missing gid_attr\n", __func__); | 
|---|
|  | 575 | +			return -EINVAL; | 
|---|
|  | 576 | +		} | 
|---|
|  | 577 | +		/* | 
|---|
|  | 578 | +		 * If the request is for a specific gid attribute of the | 
|---|
|  | 579 | +		 * rdma_dev_addr, derive net from the netdevice of the | 
|---|
|  | 580 | +		 * GID attribute. | 
|---|
|  | 581 | +		 */ | 
|---|
|  | 582 | +		ret = set_addr_netns_by_gid_rcu(addr); | 
|---|
|  | 583 | +		if (ret) { | 
|---|
|  | 584 | +			rcu_read_unlock(); | 
|---|
|  | 585 | +			return ret; | 
|---|
|  | 586 | +		} | 
|---|
|  | 587 | +	} | 
|---|
| 490 | 588 | if (src_in->sa_family == AF_INET) { | 
|---|
| 491 |  | -		struct rtable *rt = NULL; | 
|---|
| 492 |  | -		const struct sockaddr_in *dst_in4 = | 
|---|
| 493 |  | -			(const struct sockaddr_in *)dst_in; | 
|---|
| 494 |  | - | 
|---|
| 495 |  | -		ret = addr4_resolve((struct sockaddr_in *)src_in, | 
|---|
| 496 |  | -				    dst_in4, addr, &rt); | 
|---|
| 497 |  | -		if (ret) | 
|---|
| 498 |  | -			return ret; | 
|---|
| 499 |  | - | 
|---|
| 500 |  | -		if (resolve_neigh) | 
|---|
| 501 |  | -			ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); | 
|---|
| 502 |  | - | 
|---|
| 503 |  | -		if (addr->bound_dev_if) { | 
|---|
| 504 |  | -			ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | 
|---|
| 505 |  | -		} else { | 
|---|
| 506 |  | -			ndev = rt->dst.dev; | 
|---|
| 507 |  | -			dev_hold(ndev); | 
|---|
| 508 |  | -		} | 
|---|
| 509 |  | - | 
|---|
| 510 |  | -		ip_rt_put(rt); | 
|---|
|  | 589 | +		ret = addr4_resolve(src_in, dst_in, addr, &rt); | 
|---|
|  | 590 | +		dst = &rt->dst; | 
|---|
| 511 | 591 | } else { | 
|---|
| 512 |  | -		const struct sockaddr_in6 *dst_in6 = | 
|---|
| 513 |  | -			(const struct sockaddr_in6 *)dst_in; | 
|---|
|  | 592 | +		ret = addr6_resolve(src_in, dst_in, addr, &dst); | 
|---|
|  | 593 | +	} | 
|---|
|  | 594 | +	if (ret) { | 
|---|
|  | 595 | +		rcu_read_unlock(); | 
|---|
|  | 596 | +		goto done; | 
|---|
|  | 597 | +	} | 
|---|
|  | 598 | +	ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); | 
|---|
|  | 599 | +	rcu_read_unlock(); | 
|---|
| 514 | 600 |  | 
|---|
| 515 |  | -		ret = addr6_resolve((struct sockaddr_in6 *)src_in, | 
|---|
| 516 |  | -				    dst_in6, addr, | 
|---|
| 517 |  | -				    &dst); | 
|---|
| 518 |  | -		if (ret) | 
|---|
| 519 |  | -			return ret; | 
|---|
|  | 601 | +	/* | 
|---|
|  | 602 | +	 * Resolve neighbor destination address if requested and | 
|---|
|  | 603 | +	 * only if src addr translation didn't fail. | 
|---|
|  | 604 | +	 */ | 
|---|
|  | 605 | +	if (!ret && resolve_neigh) | 
|---|
|  | 606 | +		ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); | 
|---|
| 520 | 607 |  | 
|---|
| 521 |  | -		if (resolve_neigh) | 
|---|
| 522 |  | -			ret = addr_resolve_neigh(dst, dst_in, addr, seq); | 
|---|
| 523 |  | - | 
|---|
| 524 |  | -		if (addr->bound_dev_if) { | 
|---|
| 525 |  | -			ndev = dev_get_by_index(addr->net, addr->bound_dev_if); | 
|---|
| 526 |  | -		} else { | 
|---|
| 527 |  | -			ndev = dst->dev; | 
|---|
| 528 |  | -			dev_hold(ndev); | 
|---|
| 529 |  | -		} | 
|---|
| 530 |  | - | 
|---|
|  | 608 | +	if (src_in->sa_family == AF_INET) | 
|---|
|  | 609 | +		ip_rt_put(rt); | 
|---|
|  | 610 | +	else | 
|---|
| 531 | 611 | dst_release(dst); | 
|---|
| 532 |  | -	} | 
|---|
| 533 |  | - | 
|---|
| 534 |  | -	if (ndev) { | 
|---|
| 535 |  | -		if (ndev->flags & IFF_LOOPBACK) | 
|---|
| 536 |  | -			ret = rdma_translate_ip(dst_in, addr); | 
|---|
| 537 |  | -		else | 
|---|
| 538 |  | -			addr->bound_dev_if = ndev->ifindex; | 
|---|
| 539 |  | -		dev_put(ndev); | 
|---|
| 540 |  | -	} | 
|---|
| 541 |  | - | 
|---|
|  | 612 | +done: | 
|---|
|  | 613 | +	/* | 
|---|
|  | 614 | +	 * Clear the addr net to go back to its original state, only if it was | 
|---|
|  | 615 | +	 * derived from GID attribute in this context. | 
|---|
|  | 616 | +	 */ | 
|---|
|  | 617 | +	if (resolve_by_gid_attr) | 
|---|
|  | 618 | +		rdma_addr_set_net_defaults(addr); | 
|---|
| 542 | 619 | return ret; | 
|---|
| 543 | 620 | } | 
|---|
| 544 | 621 |  | 
|---|
| .. | .. | 
|---|
| 553 | 630 | src_in = (struct sockaddr *)&req->src_addr; | 
|---|
| 554 | 631 | dst_in = (struct sockaddr *)&req->dst_addr; | 
|---|
| 555 | 632 | req->status = addr_resolve(src_in, dst_in, req->addr, | 
|---|
| 556 |  | -					   true, req->seq); | 
|---|
|  | 633 | +					   true, req->resolve_by_gid_attr, | 
|---|
|  | 634 | +					   req->seq); | 
|---|
| 557 | 635 | if (req->status && time_after_eq(jiffies, req->timeout)) { | 
|---|
| 558 | 636 | req->status = -ETIMEDOUT; | 
|---|
| 559 | 637 | } else if (req->status == -ENODATA) { | 
|---|
| .. | .. | 
|---|
| 584 | 662 | } | 
|---|
| 585 | 663 |  | 
|---|
| 586 | 664 | int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, | 
|---|
| 587 |  | -		    struct rdma_dev_addr *addr, int timeout_ms, | 
|---|
|  | 665 | +		    struct rdma_dev_addr *addr, unsigned long timeout_ms, | 
|---|
| 588 | 666 | void (*callback)(int status, struct sockaddr *src_addr, | 
|---|
| 589 | 667 | struct rdma_dev_addr *addr, void *context), | 
|---|
| 590 |  | -		    void *context) | 
|---|
|  | 668 | +		    bool resolve_by_gid_attr, void *context) | 
|---|
| 591 | 669 | { | 
|---|
| 592 | 670 | struct sockaddr *src_in, *dst_in; | 
|---|
| 593 | 671 | struct addr_req *req; | 
|---|
| .. | .. | 
|---|
| 615 | 693 | req->addr = addr; | 
|---|
| 616 | 694 | req->callback = callback; | 
|---|
| 617 | 695 | req->context = context; | 
|---|
|  | 696 | +	req->resolve_by_gid_attr = resolve_by_gid_attr; | 
|---|
| 618 | 697 | INIT_DELAYED_WORK(&req->work, process_one_req); | 
|---|
| 619 | 698 | req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); | 
|---|
| 620 | 699 |  | 
|---|
| 621 |  | -	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); | 
|---|
|  | 700 | +	req->status = addr_resolve(src_in, dst_in, addr, true, | 
|---|
|  | 701 | +				   req->resolve_by_gid_attr, req->seq); | 
|---|
| 622 | 702 | switch (req->status) { | 
|---|
| 623 | 703 | case 0: | 
|---|
| 624 | 704 | req->timeout = jiffies; | 
|---|
| .. | .. | 
|---|
| 639 | 719 | } | 
|---|
| 640 | 720 | EXPORT_SYMBOL(rdma_resolve_ip); | 
|---|
| 641 | 721 |  | 
|---|
| 642 |  | -int rdma_resolve_ip_route(struct sockaddr *src_addr, | 
|---|
| 643 |  | -			  const struct sockaddr *dst_addr, | 
|---|
| 644 |  | -			  struct rdma_dev_addr *addr) | 
|---|
|  | 722 | +int roce_resolve_route_from_path(struct sa_path_rec *rec, | 
|---|
|  | 723 | +				 const struct ib_gid_attr *attr) | 
|---|
| 645 | 724 | { | 
|---|
| 646 |  | -	struct sockaddr_storage ssrc_addr = {}; | 
|---|
| 647 |  | -	struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; | 
|---|
|  | 725 | +	union { | 
|---|
|  | 726 | +		struct sockaddr     _sockaddr; | 
|---|
|  | 727 | +		struct sockaddr_in  _sockaddr_in; | 
|---|
|  | 728 | +		struct sockaddr_in6 _sockaddr_in6; | 
|---|
|  | 729 | +	} sgid, dgid; | 
|---|
|  | 730 | +	struct rdma_dev_addr dev_addr = {}; | 
|---|
|  | 731 | +	int ret; | 
|---|
| 648 | 732 |  | 
|---|
| 649 |  | -	if (src_addr) { | 
|---|
| 650 |  | -		if (src_addr->sa_family != dst_addr->sa_family) | 
|---|
| 651 |  | -			return -EINVAL; | 
|---|
|  | 733 | +	might_sleep(); | 
|---|
| 652 | 734 |  | 
|---|
| 653 |  | -		memcpy(src_in, src_addr, rdma_addr_size(src_addr)); | 
|---|
| 654 |  | -	} else { | 
|---|
| 655 |  | -		src_in->sa_family = dst_addr->sa_family; | 
|---|
| 656 |  | -	} | 
|---|
|  | 735 | +	if (rec->roce.route_resolved) | 
|---|
|  | 736 | +		return 0; | 
|---|
| 657 | 737 |  | 
|---|
| 658 |  | -	return addr_resolve(src_in, dst_addr, addr, false, 0); | 
|---|
|  | 738 | +	rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); | 
|---|
|  | 739 | +	rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); | 
|---|
|  | 740 | + | 
|---|
|  | 741 | +	if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) | 
|---|
|  | 742 | +		return -EINVAL; | 
|---|
|  | 743 | + | 
|---|
|  | 744 | +	if (!attr || !attr->ndev) | 
|---|
|  | 745 | +		return -EINVAL; | 
|---|
|  | 746 | + | 
|---|
|  | 747 | +	dev_addr.net = &init_net; | 
|---|
|  | 748 | +	dev_addr.sgid_attr = attr; | 
|---|
|  | 749 | + | 
|---|
|  | 750 | +	ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, | 
|---|
|  | 751 | +			   &dev_addr, false, true, 0); | 
|---|
|  | 752 | +	if (ret) | 
|---|
|  | 753 | +		return ret; | 
|---|
|  | 754 | + | 
|---|
|  | 755 | +	if ((dev_addr.network == RDMA_NETWORK_IPV4 || | 
|---|
|  | 756 | +	     dev_addr.network == RDMA_NETWORK_IPV6) && | 
|---|
|  | 757 | +	    rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) | 
|---|
|  | 758 | +		return -EINVAL; | 
|---|
|  | 759 | + | 
|---|
|  | 760 | +	rec->roce.route_resolved = true; | 
|---|
|  | 761 | +	return 0; | 
|---|
| 659 | 762 | } | 
|---|
| 660 | 763 |  | 
|---|
|  | 764 | +/** | 
|---|
|  | 765 | + * rdma_addr_cancel - Cancel resolve ip request | 
|---|
|  | 766 | + * @addr:	Pointer to address structure given previously | 
|---|
|  | 767 | + *		during rdma_resolve_ip(). | 
|---|
|  | 768 | + * rdma_addr_cancel() is synchronous function which cancels any pending | 
|---|
|  | 769 | + * request if there is any. | 
|---|
|  | 770 | + */ | 
|---|
| 661 | 771 | void rdma_addr_cancel(struct rdma_dev_addr *addr) | 
|---|
| 662 | 772 | { | 
|---|
| 663 | 773 | struct addr_req *req, *temp_req; | 
|---|
| .. | .. | 
|---|
| 685 | 795 | * guarentees no work is running and none will be started. | 
|---|
| 686 | 796 | */ | 
|---|
| 687 | 797 | cancel_delayed_work_sync(&found->work); | 
|---|
| 688 |  | - | 
|---|
| 689 |  | -	if (found->callback) | 
|---|
| 690 |  | -		found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr, | 
|---|
| 691 |  | -			      found->addr, found->context); | 
|---|
| 692 |  | - | 
|---|
| 693 | 798 | kfree(found); | 
|---|
| 694 | 799 | } | 
|---|
| 695 | 800 | EXPORT_SYMBOL(rdma_addr_cancel); | 
|---|
| .. | .. | 
|---|
| 708 | 813 |  | 
|---|
| 709 | 814 | int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, | 
|---|
| 710 | 815 | const union ib_gid *dgid, | 
|---|
| 711 |  | -				 u8 *dmac, const struct net_device *ndev, | 
|---|
|  | 816 | +				 u8 *dmac, const struct ib_gid_attr *sgid_attr, | 
|---|
| 712 | 817 | int *hoplimit) | 
|---|
| 713 | 818 | { | 
|---|
| 714 | 819 | struct rdma_dev_addr dev_addr; | 
|---|
| .. | .. | 
|---|
| 723 | 828 | rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); | 
|---|
| 724 | 829 |  | 
|---|
| 725 | 830 | memset(&dev_addr, 0, sizeof(dev_addr)); | 
|---|
| 726 |  | -	dev_addr.bound_dev_if = ndev->ifindex; | 
|---|
| 727 | 831 | dev_addr.net = &init_net; | 
|---|
|  | 832 | +	dev_addr.sgid_attr = sgid_attr; | 
|---|
| 728 | 833 |  | 
|---|
| 729 | 834 | init_completion(&ctx.comp); | 
|---|
| 730 | 835 | ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, | 
|---|
| 731 | 836 | (struct sockaddr *)&dgid_addr, &dev_addr, 1000, | 
|---|
| 732 |  | -			      resolve_cb, &ctx); | 
|---|
|  | 837 | +			      resolve_cb, true, &ctx); | 
|---|
| 733 | 838 | if (ret) | 
|---|
| 734 | 839 | return ret; | 
|---|
| 735 | 840 |  | 
|---|