| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0-only  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 | 3 |   * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
|---|
| 3 | 4 |   *		operating system.  INET is implemented using the  BSD Socket | 
|---|
| .. | .. | 
|---|
| 179 | 180 |  			 * Oh well... nobody has a sufficient solution to this | 
|---|
| 180 | 181 |  			 * protocol bug yet. | 
|---|
| 181 | 182 |  			 */ | 
|---|
| 182 |  | -			if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {  | 
|---|
 | 183 | +			if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {  | 
|---|
| 183 | 184 |  kill: | 
|---|
| 184 | 185 |  				inet_twsk_deschedule_put(tw); | 
|---|
| 185 | 186 |  				return TCP_TW_SUCCESS; | 
|---|
| .. | .. | 
|---|
| 265 | 266 |   | 
|---|
| 266 | 267 |  		tw->tw_transparent	= inet->transparent; | 
|---|
| 267 | 268 |  		tw->tw_mark		= sk->sk_mark; | 
|---|
 | 269 | +		tw->tw_priority		= sk->sk_priority;  | 
|---|
| 268 | 270 |  		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale; | 
|---|
| 269 | 271 |  		tcptw->tw_rcv_nxt	= tp->rcv_nxt; | 
|---|
| 270 | 272 |  		tcptw->tw_snd_nxt	= tp->snd_nxt; | 
|---|
| .. | .. | 
|---|
| 273 | 275 |  		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; | 
|---|
| 274 | 276 |  		tcptw->tw_ts_offset	= tp->tsoffset; | 
|---|
| 275 | 277 |  		tcptw->tw_last_oow_ack_time = 0; | 
|---|
| 276 |  | -  | 
|---|
 | 278 | +		tcptw->tw_tx_delay	= tp->tcp_tx_delay;  | 
|---|
| 277 | 279 |  #if IS_ENABLED(CONFIG_IPV6) | 
|---|
| 278 | 280 |  		if (tw->tw_family == PF_INET6) { | 
|---|
| 279 | 281 |  			struct ipv6_pinfo *np = inet6_sk(sk); | 
|---|
| .. | .. | 
|---|
| 282 | 284 |  			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; | 
|---|
| 283 | 285 |  			tw->tw_tclass = np->tclass; | 
|---|
| 284 | 286 |  			tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); | 
|---|
 | 287 | +			tw->tw_txhash = sk->sk_txhash;  | 
|---|
| 285 | 288 |  			tw->tw_ipv6only = sk->sk_ipv6only; | 
|---|
| 286 | 289 |  		} | 
|---|
| 287 | 290 |  #endif | 
|---|
| .. | .. | 
|---|
| 294 | 297 |  		 * so the timewait ack generating code has the key. | 
|---|
| 295 | 298 |  		 */ | 
|---|
| 296 | 299 |  		do { | 
|---|
| 297 |  | -			struct tcp_md5sig_key *key;  | 
|---|
| 298 | 300 |  			tcptw->tw_md5_key = NULL; | 
|---|
| 299 |  | -			key = tp->af_specific->md5_lookup(sk, sk);  | 
|---|
| 300 |  | -			if (key) {  | 
|---|
| 301 |  | -				tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);  | 
|---|
| 302 |  | -				BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());  | 
|---|
 | 301 | +			if (static_branch_unlikely(&tcp_md5_needed)) {  | 
|---|
 | 302 | +				struct tcp_md5sig_key *key;  | 
|---|
 | 303 | +  | 
|---|
 | 304 | +				key = tp->af_specific->md5_lookup(sk, sk);  | 
|---|
 | 305 | +				if (key) {  | 
|---|
 | 306 | +					tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);  | 
|---|
 | 307 | +					BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());  | 
|---|
 | 308 | +				}  | 
|---|
| 303 | 309 |  			} | 
|---|
| 304 | 310 |  		} while (0); | 
|---|
| 305 | 311 |  #endif | 
|---|
| .. | .. | 
|---|
| 338 | 344 |  void tcp_twsk_destructor(struct sock *sk) | 
|---|
| 339 | 345 |  { | 
|---|
| 340 | 346 |  #ifdef CONFIG_TCP_MD5SIG | 
|---|
| 341 |  | -	struct tcp_timewait_sock *twsk = tcp_twsk(sk);  | 
|---|
 | 347 | +	if (static_branch_unlikely(&tcp_md5_needed)) {  | 
|---|
 | 348 | +		struct tcp_timewait_sock *twsk = tcp_twsk(sk);  | 
|---|
| 342 | 349 |   | 
|---|
| 343 |  | -	if (twsk->tw_md5_key)  | 
|---|
| 344 |  | -		kfree_rcu(twsk->tw_md5_key, rcu);  | 
|---|
 | 350 | +		if (twsk->tw_md5_key)  | 
|---|
 | 351 | +			kfree_rcu(twsk->tw_md5_key, rcu);  | 
|---|
 | 352 | +	}  | 
|---|
| 345 | 353 |  #endif | 
|---|
| 346 | 354 |  } | 
|---|
| 347 | 355 |  EXPORT_SYMBOL_GPL(tcp_twsk_destructor); | 
|---|
| .. | .. | 
|---|
| 406 | 414 |   | 
|---|
| 407 | 415 |  		rcu_read_lock(); | 
|---|
| 408 | 416 |  		ca = tcp_ca_find_key(ca_key); | 
|---|
| 409 |  | -		if (likely(ca && try_module_get(ca->owner))) {  | 
|---|
 | 417 | +		if (likely(ca && bpf_try_module_get(ca, ca->owner))) {  | 
|---|
| 410 | 418 |  			icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); | 
|---|
| 411 | 419 |  			icsk->icsk_ca_ops = ca; | 
|---|
| 412 | 420 |  			ca_got_dst = true; | 
|---|
| .. | .. | 
|---|
| 417 | 425 |  	/* If no valid choice made yet, assign current system default ca. */ | 
|---|
| 418 | 426 |  	if (!ca_got_dst && | 
|---|
| 419 | 427 |  	    (!icsk->icsk_ca_setsockopt || | 
|---|
| 420 |  | -	     !try_module_get(icsk->icsk_ca_ops->owner)))  | 
|---|
 | 428 | +	     !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner)))  | 
|---|
| 421 | 429 |  		tcp_assign_congestion_control(sk); | 
|---|
| 422 | 430 |   | 
|---|
| 423 | 431 |  	tcp_set_ca_state(sk, TCP_CA_Open); | 
|---|
| .. | .. | 
|---|
| 474 | 482 |  	WRITE_ONCE(newtp->rcv_nxt, seq); | 
|---|
| 475 | 483 |  	newtp->segs_in = 1; | 
|---|
| 476 | 484 |   | 
|---|
| 477 |  | -	newtp->snd_sml = newtp->snd_una =  | 
|---|
| 478 |  | -	newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;  | 
|---|
 | 485 | +	seq = treq->snt_isn + 1;  | 
|---|
 | 486 | +	newtp->snd_sml = newtp->snd_una = seq;  | 
|---|
 | 487 | +	WRITE_ONCE(newtp->snd_nxt, seq);  | 
|---|
 | 488 | +	newtp->snd_up = seq;  | 
|---|
| 479 | 489 |   | 
|---|
| 480 | 490 |  	INIT_LIST_HEAD(&newtp->tsq_node); | 
|---|
| 481 | 491 |  	INIT_LIST_HEAD(&newtp->tsorted_sent_queue); | 
|---|
| 482 | 492 |   | 
|---|
| 483 | 493 |  	tcp_init_wl(newtp, treq->rcv_isn); | 
|---|
| 484 | 494 |   | 
|---|
| 485 |  | -	newtp->srtt_us = 0;  | 
|---|
| 486 |  | -	newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);  | 
|---|
| 487 | 495 |  	minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); | 
|---|
| 488 |  | -	newicsk->icsk_rto = TCP_TIMEOUT_INIT;  | 
|---|
| 489 | 496 |  	newicsk->icsk_ack.lrcvtime = tcp_jiffies32; | 
|---|
| 490 | 497 |   | 
|---|
| 491 |  | -	newtp->packets_out = 0;  | 
|---|
| 492 |  | -	newtp->retrans_out = 0;  | 
|---|
| 493 |  | -	newtp->sacked_out = 0;  | 
|---|
| 494 |  | -	newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  | 
|---|
| 495 |  | -	newtp->tlp_high_seq = 0;  | 
|---|
| 496 | 498 |  	newtp->lsndtime = tcp_jiffies32; | 
|---|
| 497 | 499 |  	newsk->sk_txhash = treq->txhash; | 
|---|
| 498 |  | -	newtp->last_oow_ack_time = 0;  | 
|---|
| 499 | 500 |  	newtp->total_retrans = req->num_retrans; | 
|---|
| 500 |  | -  | 
|---|
| 501 |  | -	/* So many TCP implementations out there (incorrectly) count the  | 
|---|
| 502 |  | -	 * initial SYN frame in their delayed-ACK and congestion control  | 
|---|
| 503 |  | -	 * algorithms that we must have the following bandaid to talk  | 
|---|
| 504 |  | -	 * efficiently to them.  -DaveM  | 
|---|
| 505 |  | -	 */  | 
|---|
| 506 |  | -	newtp->snd_cwnd = TCP_INIT_CWND;  | 
|---|
| 507 |  | -	newtp->snd_cwnd_cnt = 0;  | 
|---|
| 508 |  | -  | 
|---|
| 509 |  | -	/* There's a bubble in the pipe until at least the first ACK. */  | 
|---|
| 510 |  | -	newtp->app_limited = ~0U;  | 
|---|
| 511 | 501 |   | 
|---|
| 512 | 502 |  	tcp_init_xmit_timers(newsk); | 
|---|
| 513 | 503 |  	WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); | 
|---|
| 514 |  | -  | 
|---|
| 515 |  | -	newtp->rx_opt.saw_tstamp = 0;  | 
|---|
| 516 |  | -  | 
|---|
| 517 |  | -	newtp->rx_opt.dsack = 0;  | 
|---|
| 518 |  | -	newtp->rx_opt.num_sacks = 0;  | 
|---|
| 519 |  | -  | 
|---|
| 520 |  | -	newtp->urg_data = 0;  | 
|---|
| 521 | 504 |   | 
|---|
| 522 | 505 |  	if (sock_flag(newsk, SOCK_KEEPOPEN)) | 
|---|
| 523 | 506 |  		inet_csk_reset_keepalive_timer(newsk, | 
|---|
| .. | .. | 
|---|
| 540 | 523 |  	newtp->max_window = newtp->snd_wnd; | 
|---|
| 541 | 524 |   | 
|---|
| 542 | 525 |  	if (newtp->rx_opt.tstamp_ok) { | 
|---|
| 543 |  | -		newtp->rx_opt.ts_recent = req->ts_recent;  | 
|---|
 | 526 | +		newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);  | 
|---|
| 544 | 527 |  		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); | 
|---|
| 545 | 528 |  		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; | 
|---|
| 546 | 529 |  	} else { | 
|---|
| 547 | 530 |  		newtp->rx_opt.ts_recent_stamp = 0; | 
|---|
| 548 | 531 |  		newtp->tcp_header_len = sizeof(struct tcphdr); | 
|---|
| 549 | 532 |  	} | 
|---|
 | 533 | +	if (req->num_timeout) {  | 
|---|
 | 534 | +		newtp->undo_marker = treq->snt_isn;  | 
|---|
 | 535 | +		newtp->retrans_stamp = div_u64(treq->snt_synack,  | 
|---|
 | 536 | +					       USEC_PER_SEC / TCP_TS_HZ);  | 
|---|
 | 537 | +	}  | 
|---|
| 550 | 538 |  	newtp->tsoffset = treq->ts_off; | 
|---|
| 551 | 539 |  #ifdef CONFIG_TCP_MD5SIG | 
|---|
| 552 | 540 |  	newtp->md5sig_info = NULL;	/*XXX*/ | 
|---|
| 553 |  | -	if (newtp->af_specific->md5_lookup(sk, newsk))  | 
|---|
 | 541 | +	if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))  | 
|---|
| 554 | 542 |  		newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; | 
|---|
| 555 | 543 |  #endif | 
|---|
| 556 | 544 |  	if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) | 
|---|
| .. | .. | 
|---|
| 558 | 546 |  	newtp->rx_opt.mss_clamp = req->mss; | 
|---|
| 559 | 547 |  	tcp_ecn_openreq_child(newtp, req); | 
|---|
| 560 | 548 |  	newtp->fastopen_req = NULL; | 
|---|
| 561 |  | -	newtp->fastopen_rsk = NULL;  | 
|---|
| 562 |  | -	newtp->syn_data_acked = 0;  | 
|---|
| 563 |  | -	newtp->rack.mstamp = 0;  | 
|---|
| 564 |  | -	newtp->rack.advanced = 0;  | 
|---|
| 565 |  | -	newtp->rack.reo_wnd_steps = 1;  | 
|---|
| 566 |  | -	newtp->rack.last_delivered = 0;  | 
|---|
| 567 |  | -	newtp->rack.reo_wnd_persist = 0;  | 
|---|
| 568 |  | -	newtp->rack.dsack_seen = 0;  | 
|---|
 | 549 | +	RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);  | 
|---|
 | 550 | +  | 
|---|
 | 551 | +	tcp_bpf_clone(sk, newsk);  | 
|---|
| 569 | 552 |   | 
|---|
| 570 | 553 |  	__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); | 
|---|
| 571 | 554 |   | 
|---|
| .. | .. | 
|---|
| 582 | 565 |   * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? | 
|---|
| 583 | 566 |   * | 
|---|
| 584 | 567 |   * We don't need to initialize tmp_opt.sack_ok as we don't use the results | 
|---|
 | 568 | + *  | 
|---|
 | 569 | + * Note: If @fastopen is true, this can be called from process context.  | 
|---|
 | 570 | + *       Otherwise, this is from BH context.  | 
|---|
| 585 | 571 |   */ | 
|---|
| 586 | 572 |   | 
|---|
| 587 | 573 |  struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | 
|---|
| .. | .. | 
|---|
| 600 | 586 |  		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); | 
|---|
| 601 | 587 |   | 
|---|
| 602 | 588 |  		if (tmp_opt.saw_tstamp) { | 
|---|
| 603 |  | -			tmp_opt.ts_recent = req->ts_recent;  | 
|---|
 | 589 | +			tmp_opt.ts_recent = READ_ONCE(req->ts_recent);  | 
|---|
| 604 | 590 |  			if (tmp_opt.rcv_tsecr) | 
|---|
| 605 | 591 |  				tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; | 
|---|
| 606 | 592 |  			/* We do not store true stamp, but it is not required, | 
|---|
| .. | .. | 
|---|
| 734 | 720 |  					  &tcp_rsk(req)->last_oow_ack_time)) | 
|---|
| 735 | 721 |  			req->rsk_ops->send_ack(sk, skb, req); | 
|---|
| 736 | 722 |  		if (paws_reject) | 
|---|
| 737 |  | -			__NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);  | 
|---|
 | 723 | +			NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);  | 
|---|
| 738 | 724 |  		return NULL; | 
|---|
| 739 | 725 |  	} | 
|---|
| 740 | 726 |   | 
|---|
| 741 | 727 |  	/* In sequence, PAWS is OK. */ | 
|---|
| 742 | 728 |   | 
|---|
 | 729 | +	/* TODO: We probably should defer ts_recent change once  | 
|---|
 | 730 | +	 * we take ownership of @req.  | 
|---|
 | 731 | +	 */  | 
|---|
| 743 | 732 |  	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) | 
|---|
| 744 |  | -		req->ts_recent = tmp_opt.rcv_tsval;  | 
|---|
 | 733 | +		WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);  | 
|---|
| 745 | 734 |   | 
|---|
| 746 | 735 |  	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { | 
|---|
| 747 | 736 |  		/* Truncate SYN, it is out of window starting | 
|---|
| .. | .. | 
|---|
| 753 | 742 |  	 *	   "fourth, check the SYN bit" | 
|---|
| 754 | 743 |  	 */ | 
|---|
| 755 | 744 |  	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { | 
|---|
| 756 |  | -		__TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);  | 
|---|
 | 745 | +		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);  | 
|---|
| 757 | 746 |  		goto embryonic_reset; | 
|---|
| 758 | 747 |  	} | 
|---|
| 759 | 748 |   | 
|---|
| .. | .. | 
|---|
| 790 | 779 |  							 req, &own_req); | 
|---|
| 791 | 780 |  	if (!child) | 
|---|
| 792 | 781 |  		goto listen_overflow; | 
|---|
 | 782 | +  | 
|---|
 | 783 | +	if (own_req && rsk_drop_req(req)) {  | 
|---|
 | 784 | +		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);  | 
|---|
 | 785 | +		inet_csk_reqsk_queue_drop_and_put(sk, req);  | 
|---|
 | 786 | +		return child;  | 
|---|
 | 787 | +	}  | 
|---|
| 793 | 788 |   | 
|---|
| 794 | 789 |  	sock_rps_save_rxhash(child, skb); | 
|---|
| 795 | 790 |  	tcp_synack_rtt_meas(child, req); | 
|---|
| .. | .. | 
|---|
| 839 | 834 |   | 
|---|
| 840 | 835 |  int tcp_child_process(struct sock *parent, struct sock *child, | 
|---|
| 841 | 836 |  		      struct sk_buff *skb) | 
|---|
 | 837 | +	__releases(&((child)->sk_lock.slock))  | 
|---|
| 842 | 838 |  { | 
|---|
| 843 | 839 |  	int ret = 0; | 
|---|
| 844 | 840 |  	int state = child->sk_state; | 
|---|