hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/net/ipv4/ip_fragment.c
....@@ -82,15 +82,13 @@
8282 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
8383 {
8484 struct ipq *qp = container_of(q, struct ipq, q);
85
- struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
86
- frags);
87
- struct net *net = container_of(ipv4, struct net, ipv4);
85
+ struct net *net = q->fqdir->net;
8886
8987 const struct frag_v4_compare_key *key = a;
9088
9189 q->key.v4 = *key;
9290 qp->ecn = 0;
93
- qp->peer = q->net->max_dist ?
91
+ qp->peer = q->fqdir->max_dist ?
9492 inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
9593 NULL;
9694 }
....@@ -142,9 +140,14 @@
142140 int err;
143141
144142 qp = container_of(frag, struct ipq, q);
145
- net = container_of(qp->q.net, struct net, ipv4.frags);
143
+ net = qp->q.fqdir->net;
146144
147145 rcu_read_lock();
146
+
147
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
148
+ if (READ_ONCE(qp->q.fqdir->dead))
149
+ goto out_rcu_unlock;
150
+
148151 spin_lock(&qp->q.lock);
149152
150153 if (qp->q.flags & INET_FRAG_COMPLETE)
....@@ -191,8 +194,7 @@
191194 spin_unlock(&qp->q.lock);
192195 out_rcu_unlock:
193196 rcu_read_unlock();
194
- if (head)
195
- kfree_skb(head);
197
+ kfree_skb(head);
196198 ipq_put(qp);
197199 }
198200
....@@ -212,7 +214,7 @@
212214 };
213215 struct inet_frag_queue *q;
214216
215
- q = inet_frag_find(&net->ipv4.frags, &key);
217
+ q = inet_frag_find(net->ipv4.fqdir, &key);
216218 if (!q)
217219 return NULL;
218220
....@@ -223,7 +225,7 @@
223225 static int ip_frag_too_far(struct ipq *qp)
224226 {
225227 struct inet_peer *peer = qp->peer;
226
- unsigned int max = qp->q.net->max_dist;
228
+ unsigned int max = qp->q.fqdir->max_dist;
227229 unsigned int start, end;
228230
229231 int rc;
....@@ -237,12 +239,8 @@
237239
238240 rc = qp->q.fragments_tail && (end - start) > max;
239241
240
- if (rc) {
241
- struct net *net;
242
-
243
- net = container_of(qp->q.net, struct net, ipv4.frags);
244
- __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
245
- }
242
+ if (rc)
243
+ __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
246244
247245 return rc;
248246 }
....@@ -251,18 +249,17 @@
251249 {
252250 unsigned int sum_truesize = 0;
253251
254
- if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
252
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
255253 refcount_inc(&qp->q.refcnt);
256254 return -ETIMEDOUT;
257255 }
258256
259257 sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
260
- sub_frag_mem_limit(qp->q.net, sum_truesize);
258
+ sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
261259
262260 qp->q.flags = 0;
263261 qp->q.len = 0;
264262 qp->q.meat = 0;
265
- qp->q.fragments = NULL;
266263 qp->q.rb_fragments = RB_ROOT;
267264 qp->q.fragments_tail = NULL;
268265 qp->q.last_run_head = NULL;
....@@ -275,7 +272,7 @@
275272 /* Add new segment to existing queue. */
276273 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
277274 {
278
- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
275
+ struct net *net = qp->q.fqdir->net;
279276 int ihl, end, flags, offset;
280277 struct sk_buff *prev_tail;
281278 struct net_device *dev;
....@@ -354,7 +351,7 @@
354351 qp->q.stamp = skb->tstamp;
355352 qp->q.meat += skb->len;
356353 qp->ecn |= ecn;
357
- add_frag_mem_limit(qp->q.net, skb->truesize);
354
+ add_frag_mem_limit(qp->q.fqdir, skb->truesize);
358355 if (offset == 0)
359356 qp->q.flags |= INET_FRAG_FIRST_IN;
360357
....@@ -397,11 +394,16 @@
397394 return err;
398395 }
399396
397
+static bool ip_frag_coalesce_ok(const struct ipq *qp)
398
+{
399
+ return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
400
+}
401
+
400402 /* Build a new IP datagram from all its fragments. */
401403 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
402404 struct sk_buff *prev_tail, struct net_device *dev)
403405 {
404
- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
406
+ struct net *net = qp->q.fqdir->net;
405407 struct iphdr *iph;
406408 void *reasm_data;
407409 int len, err;
....@@ -425,7 +427,8 @@
425427 if (len > 65535)
426428 goto out_oversize;
427429
428
- inet_frag_reasm_finish(&qp->q, skb, reasm_data);
430
+ inet_frag_reasm_finish(&qp->q, skb, reasm_data,
431
+ ip_frag_coalesce_ok(qp));
429432
430433 skb->dev = dev;
431434 IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
....@@ -452,7 +455,6 @@
452455 ip_send_check(iph);
453456
454457 __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
455
- qp->q.fragments = NULL;
456458 qp->q.rb_fragments = RB_ROOT;
457459 qp->q.fragments_tail = NULL;
458460 qp->q.last_run_head = NULL;
....@@ -547,30 +549,24 @@
547549 static struct ctl_table ip4_frags_ns_ctl_table[] = {
548550 {
549551 .procname = "ipfrag_high_thresh",
550
- .data = &init_net.ipv4.frags.high_thresh,
551552 .maxlen = sizeof(unsigned long),
552553 .mode = 0644,
553554 .proc_handler = proc_doulongvec_minmax,
554
- .extra1 = &init_net.ipv4.frags.low_thresh
555555 },
556556 {
557557 .procname = "ipfrag_low_thresh",
558
- .data = &init_net.ipv4.frags.low_thresh,
559558 .maxlen = sizeof(unsigned long),
560559 .mode = 0644,
561560 .proc_handler = proc_doulongvec_minmax,
562
- .extra2 = &init_net.ipv4.frags.high_thresh
563561 },
564562 {
565563 .procname = "ipfrag_time",
566
- .data = &init_net.ipv4.frags.timeout,
567564 .maxlen = sizeof(int),
568565 .mode = 0644,
569566 .proc_handler = proc_dointvec_jiffies,
570567 },
571568 {
572569 .procname = "ipfrag_max_dist",
573
- .data = &init_net.ipv4.frags.max_dist,
574570 .maxlen = sizeof(int),
575571 .mode = 0644,
576572 .proc_handler = proc_dointvec_minmax,
....@@ -603,14 +599,13 @@
603599 if (!table)
604600 goto err_alloc;
605601
606
- table[0].data = &net->ipv4.frags.high_thresh;
607
- table[0].extra1 = &net->ipv4.frags.low_thresh;
608
- table[0].extra2 = &init_net.ipv4.frags.high_thresh;
609
- table[1].data = &net->ipv4.frags.low_thresh;
610
- table[1].extra2 = &net->ipv4.frags.high_thresh;
611
- table[2].data = &net->ipv4.frags.timeout;
612
- table[3].data = &net->ipv4.frags.max_dist;
613602 }
603
+ table[0].data = &net->ipv4.fqdir->high_thresh;
604
+ table[0].extra1 = &net->ipv4.fqdir->low_thresh;
605
+ table[1].data = &net->ipv4.fqdir->low_thresh;
606
+ table[1].extra2 = &net->ipv4.fqdir->high_thresh;
607
+ table[2].data = &net->ipv4.fqdir->timeout;
608
+ table[3].data = &net->ipv4.fqdir->max_dist;
614609
615610 hdr = register_net_sysctl(net, "net/ipv4", table);
616611 if (!hdr)
....@@ -658,6 +653,9 @@
658653 {
659654 int res;
660655
656
+ res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
657
+ if (res < 0)
658
+ return res;
661659 /* Fragment cache limits.
662660 *
663661 * The fragment memory accounting code, (tries to) account for
....@@ -672,36 +670,38 @@
672670 * we will prune down to 3MB, making room for approx 8 big 64K
673671 * fragments 8x128k.
674672 */
675
- net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
676
- net->ipv4.frags.low_thresh = 3 * 1024 * 1024;
673
+ net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
674
+ net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024;
677675 /*
678676 * Important NOTE! Fragment queue must be destroyed before MSL expires.
679677 * RFC791 is wrong proposing to prolongate timer each fragment arrival
680678 * by TTL.
681679 */
682
- net->ipv4.frags.timeout = IP_FRAG_TIME;
680
+ net->ipv4.fqdir->timeout = IP_FRAG_TIME;
683681
684
- net->ipv4.frags.max_dist = 64;
685
- net->ipv4.frags.f = &ip4_frags;
682
+ net->ipv4.fqdir->max_dist = 64;
686683
687
- res = inet_frags_init_net(&net->ipv4.frags);
688
- if (res < 0)
689
- return res;
690684 res = ip4_frags_ns_ctl_register(net);
691685 if (res < 0)
692
- inet_frags_exit_net(&net->ipv4.frags);
686
+ fqdir_exit(net->ipv4.fqdir);
693687 return res;
688
+}
689
+
690
+static void __net_exit ipv4_frags_pre_exit_net(struct net *net)
691
+{
692
+ fqdir_pre_exit(net->ipv4.fqdir);
694693 }
695694
696695 static void __net_exit ipv4_frags_exit_net(struct net *net)
697696 {
698697 ip4_frags_ns_ctl_unregister(net);
699
- inet_frags_exit_net(&net->ipv4.frags);
698
+ fqdir_exit(net->ipv4.fqdir);
700699 }
701700
702701 static struct pernet_operations ip4_frags_ops = {
703
- .init = ipv4_frags_init_net,
704
- .exit = ipv4_frags_exit_net,
702
+ .init = ipv4_frags_init_net,
703
+ .pre_exit = ipv4_frags_pre_exit_net,
704
+ .exit = ipv4_frags_exit_net,
705705 };
706706
707707