.. | .. |
---|
82 | 82 | static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
---|
83 | 83 | { |
---|
84 | 84 | struct ipq *qp = container_of(q, struct ipq, q); |
---|
85 | | - struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, |
---|
86 | | - frags); |
---|
87 | | - struct net *net = container_of(ipv4, struct net, ipv4); |
---|
| 85 | + struct net *net = q->fqdir->net; |
---|
88 | 86 | |
---|
89 | 87 | const struct frag_v4_compare_key *key = a; |
---|
90 | 88 | |
---|
91 | 89 | q->key.v4 = *key; |
---|
92 | 90 | qp->ecn = 0; |
---|
93 | | - qp->peer = q->net->max_dist ? |
---|
| 91 | + qp->peer = q->fqdir->max_dist ? |
---|
94 | 92 | inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : |
---|
95 | 93 | NULL; |
---|
96 | 94 | } |
---|
.. | .. |
---|
142 | 140 | int err; |
---|
143 | 141 | |
---|
144 | 142 | qp = container_of(frag, struct ipq, q); |
---|
145 | | - net = container_of(qp->q.net, struct net, ipv4.frags); |
---|
| 143 | + net = qp->q.fqdir->net; |
---|
146 | 144 | |
---|
147 | 145 | rcu_read_lock(); |
---|
| 146 | + |
---|
| 147 | + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ |
---|
| 148 | + if (READ_ONCE(qp->q.fqdir->dead)) |
---|
| 149 | + goto out_rcu_unlock; |
---|
| 150 | + |
---|
148 | 151 | spin_lock(&qp->q.lock); |
---|
149 | 152 | |
---|
150 | 153 | if (qp->q.flags & INET_FRAG_COMPLETE) |
---|
.. | .. |
---|
191 | 194 | spin_unlock(&qp->q.lock); |
---|
192 | 195 | out_rcu_unlock: |
---|
193 | 196 | rcu_read_unlock(); |
---|
194 | | - if (head) |
---|
195 | | - kfree_skb(head); |
---|
| 197 | + kfree_skb(head); |
---|
196 | 198 | ipq_put(qp); |
---|
197 | 199 | } |
---|
198 | 200 | |
---|
.. | .. |
---|
212 | 214 | }; |
---|
213 | 215 | struct inet_frag_queue *q; |
---|
214 | 216 | |
---|
215 | | - q = inet_frag_find(&net->ipv4.frags, &key); |
---|
| 217 | + q = inet_frag_find(net->ipv4.fqdir, &key); |
---|
216 | 218 | if (!q) |
---|
217 | 219 | return NULL; |
---|
218 | 220 | |
---|
.. | .. |
---|
223 | 225 | static int ip_frag_too_far(struct ipq *qp) |
---|
224 | 226 | { |
---|
225 | 227 | struct inet_peer *peer = qp->peer; |
---|
226 | | - unsigned int max = qp->q.net->max_dist; |
---|
| 228 | + unsigned int max = qp->q.fqdir->max_dist; |
---|
227 | 229 | unsigned int start, end; |
---|
228 | 230 | |
---|
229 | 231 | int rc; |
---|
.. | .. |
---|
237 | 239 | |
---|
238 | 240 | rc = qp->q.fragments_tail && (end - start) > max; |
---|
239 | 241 | |
---|
240 | | - if (rc) { |
---|
241 | | - struct net *net; |
---|
242 | | - |
---|
243 | | - net = container_of(qp->q.net, struct net, ipv4.frags); |
---|
244 | | - __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); |
---|
245 | | - } |
---|
| 242 | + if (rc) |
---|
| 243 | + __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); |
---|
246 | 244 | |
---|
247 | 245 | return rc; |
---|
248 | 246 | } |
---|
.. | .. |
---|
251 | 249 | { |
---|
252 | 250 | unsigned int sum_truesize = 0; |
---|
253 | 251 | |
---|
254 | | - if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { |
---|
| 252 | + if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { |
---|
255 | 253 | refcount_inc(&qp->q.refcnt); |
---|
256 | 254 | return -ETIMEDOUT; |
---|
257 | 255 | } |
---|
258 | 256 | |
---|
259 | 257 | sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); |
---|
260 | | - sub_frag_mem_limit(qp->q.net, sum_truesize); |
---|
| 258 | + sub_frag_mem_limit(qp->q.fqdir, sum_truesize); |
---|
261 | 259 | |
---|
262 | 260 | qp->q.flags = 0; |
---|
263 | 261 | qp->q.len = 0; |
---|
264 | 262 | qp->q.meat = 0; |
---|
265 | | - qp->q.fragments = NULL; |
---|
266 | 263 | qp->q.rb_fragments = RB_ROOT; |
---|
267 | 264 | qp->q.fragments_tail = NULL; |
---|
268 | 265 | qp->q.last_run_head = NULL; |
---|
.. | .. |
---|
275 | 272 | /* Add new segment to existing queue. */ |
---|
276 | 273 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
---|
277 | 274 | { |
---|
278 | | - struct net *net = container_of(qp->q.net, struct net, ipv4.frags); |
---|
| 275 | + struct net *net = qp->q.fqdir->net; |
---|
279 | 276 | int ihl, end, flags, offset; |
---|
280 | 277 | struct sk_buff *prev_tail; |
---|
281 | 278 | struct net_device *dev; |
---|
.. | .. |
---|
354 | 351 | qp->q.stamp = skb->tstamp; |
---|
355 | 352 | qp->q.meat += skb->len; |
---|
356 | 353 | qp->ecn |= ecn; |
---|
357 | | - add_frag_mem_limit(qp->q.net, skb->truesize); |
---|
| 354 | + add_frag_mem_limit(qp->q.fqdir, skb->truesize); |
---|
358 | 355 | if (offset == 0) |
---|
359 | 356 | qp->q.flags |= INET_FRAG_FIRST_IN; |
---|
360 | 357 | |
---|
.. | .. |
---|
397 | 394 | return err; |
---|
398 | 395 | } |
---|
399 | 396 | |
---|
| 397 | +static bool ip_frag_coalesce_ok(const struct ipq *qp) |
---|
| 398 | +{ |
---|
| 399 | + return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; |
---|
| 400 | +} |
---|
| 401 | + |
---|
400 | 402 | /* Build a new IP datagram from all its fragments. */ |
---|
401 | 403 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, |
---|
402 | 404 | struct sk_buff *prev_tail, struct net_device *dev) |
---|
403 | 405 | { |
---|
404 | | - struct net *net = container_of(qp->q.net, struct net, ipv4.frags); |
---|
| 406 | + struct net *net = qp->q.fqdir->net; |
---|
405 | 407 | struct iphdr *iph; |
---|
406 | 408 | void *reasm_data; |
---|
407 | 409 | int len, err; |
---|
.. | .. |
---|
425 | 427 | if (len > 65535) |
---|
426 | 428 | goto out_oversize; |
---|
427 | 429 | |
---|
428 | | - inet_frag_reasm_finish(&qp->q, skb, reasm_data); |
---|
| 430 | + inet_frag_reasm_finish(&qp->q, skb, reasm_data, |
---|
| 431 | + ip_frag_coalesce_ok(qp)); |
---|
429 | 432 | |
---|
430 | 433 | skb->dev = dev; |
---|
431 | 434 | IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); |
---|
.. | .. |
---|
452 | 455 | ip_send_check(iph); |
---|
453 | 456 | |
---|
454 | 457 | __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); |
---|
455 | | - qp->q.fragments = NULL; |
---|
456 | 458 | qp->q.rb_fragments = RB_ROOT; |
---|
457 | 459 | qp->q.fragments_tail = NULL; |
---|
458 | 460 | qp->q.last_run_head = NULL; |
---|
.. | .. |
---|
547 | 549 | static struct ctl_table ip4_frags_ns_ctl_table[] = { |
---|
548 | 550 | { |
---|
549 | 551 | .procname = "ipfrag_high_thresh", |
---|
550 | | - .data = &init_net.ipv4.frags.high_thresh, |
---|
551 | 552 | .maxlen = sizeof(unsigned long), |
---|
552 | 553 | .mode = 0644, |
---|
553 | 554 | .proc_handler = proc_doulongvec_minmax, |
---|
554 | | - .extra1 = &init_net.ipv4.frags.low_thresh |
---|
555 | 555 | }, |
---|
556 | 556 | { |
---|
557 | 557 | .procname = "ipfrag_low_thresh", |
---|
558 | | - .data = &init_net.ipv4.frags.low_thresh, |
---|
559 | 558 | .maxlen = sizeof(unsigned long), |
---|
560 | 559 | .mode = 0644, |
---|
561 | 560 | .proc_handler = proc_doulongvec_minmax, |
---|
562 | | - .extra2 = &init_net.ipv4.frags.high_thresh |
---|
563 | 561 | }, |
---|
564 | 562 | { |
---|
565 | 563 | .procname = "ipfrag_time", |
---|
566 | | - .data = &init_net.ipv4.frags.timeout, |
---|
567 | 564 | .maxlen = sizeof(int), |
---|
568 | 565 | .mode = 0644, |
---|
569 | 566 | .proc_handler = proc_dointvec_jiffies, |
---|
570 | 567 | }, |
---|
571 | 568 | { |
---|
572 | 569 | .procname = "ipfrag_max_dist", |
---|
573 | | - .data = &init_net.ipv4.frags.max_dist, |
---|
574 | 570 | .maxlen = sizeof(int), |
---|
575 | 571 | .mode = 0644, |
---|
576 | 572 | .proc_handler = proc_dointvec_minmax, |
---|
.. | .. |
---|
603 | 599 | if (!table) |
---|
604 | 600 | goto err_alloc; |
---|
605 | 601 | |
---|
606 | | - table[0].data = &net->ipv4.frags.high_thresh; |
---|
607 | | - table[0].extra1 = &net->ipv4.frags.low_thresh; |
---|
608 | | - table[0].extra2 = &init_net.ipv4.frags.high_thresh; |
---|
609 | | - table[1].data = &net->ipv4.frags.low_thresh; |
---|
610 | | - table[1].extra2 = &net->ipv4.frags.high_thresh; |
---|
611 | | - table[2].data = &net->ipv4.frags.timeout; |
---|
612 | | - table[3].data = &net->ipv4.frags.max_dist; |
---|
613 | 602 | } |
---|
| 603 | + table[0].data = &net->ipv4.fqdir->high_thresh; |
---|
| 604 | + table[0].extra1 = &net->ipv4.fqdir->low_thresh; |
---|
| 605 | + table[1].data = &net->ipv4.fqdir->low_thresh; |
---|
| 606 | + table[1].extra2 = &net->ipv4.fqdir->high_thresh; |
---|
| 607 | + table[2].data = &net->ipv4.fqdir->timeout; |
---|
| 608 | + table[3].data = &net->ipv4.fqdir->max_dist; |
---|
614 | 609 | |
---|
615 | 610 | hdr = register_net_sysctl(net, "net/ipv4", table); |
---|
616 | 611 | if (!hdr) |
---|
.. | .. |
---|
658 | 653 | { |
---|
659 | 654 | int res; |
---|
660 | 655 | |
---|
| 656 | + res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); |
---|
| 657 | + if (res < 0) |
---|
| 658 | + return res; |
---|
661 | 659 | /* Fragment cache limits. |
---|
662 | 660 | * |
---|
663 | 661 | * The fragment memory accounting code, (tries to) account for |
---|
.. | .. |
---|
672 | 670 | * we will prune down to 3MB, making room for approx 8 big 64K |
---|
673 | 671 | * fragments 8x128k. |
---|
674 | 672 | */ |
---|
675 | | - net->ipv4.frags.high_thresh = 4 * 1024 * 1024; |
---|
676 | | - net->ipv4.frags.low_thresh = 3 * 1024 * 1024; |
---|
| 673 | + net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; |
---|
| 674 | + net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; |
---|
677 | 675 | /* |
---|
678 | 676 | * Important NOTE! Fragment queue must be destroyed before MSL expires. |
---|
679 | 677 | * RFC791 is wrong proposing to prolongate timer each fragment arrival |
---|
680 | 678 | * by TTL. |
---|
681 | 679 | */ |
---|
682 | | - net->ipv4.frags.timeout = IP_FRAG_TIME; |
---|
| 680 | + net->ipv4.fqdir->timeout = IP_FRAG_TIME; |
---|
683 | 681 | |
---|
684 | | - net->ipv4.frags.max_dist = 64; |
---|
685 | | - net->ipv4.frags.f = &ip4_frags; |
---|
| 682 | + net->ipv4.fqdir->max_dist = 64; |
---|
686 | 683 | |
---|
687 | | - res = inet_frags_init_net(&net->ipv4.frags); |
---|
688 | | - if (res < 0) |
---|
689 | | - return res; |
---|
690 | 684 | res = ip4_frags_ns_ctl_register(net); |
---|
691 | 685 | if (res < 0) |
---|
692 | | - inet_frags_exit_net(&net->ipv4.frags); |
---|
| 686 | + fqdir_exit(net->ipv4.fqdir); |
---|
693 | 687 | return res; |
---|
| 688 | +} |
---|
| 689 | + |
---|
| 690 | +static void __net_exit ipv4_frags_pre_exit_net(struct net *net) |
---|
| 691 | +{ |
---|
| 692 | + fqdir_pre_exit(net->ipv4.fqdir); |
---|
694 | 693 | } |
---|
695 | 694 | |
---|
696 | 695 | static void __net_exit ipv4_frags_exit_net(struct net *net) |
---|
697 | 696 | { |
---|
698 | 697 | ip4_frags_ns_ctl_unregister(net); |
---|
699 | | - inet_frags_exit_net(&net->ipv4.frags); |
---|
| 698 | + fqdir_exit(net->ipv4.fqdir); |
---|
700 | 699 | } |
---|
701 | 700 | |
---|
702 | 701 | static struct pernet_operations ip4_frags_ops = { |
---|
703 | | - .init = ipv4_frags_init_net, |
---|
704 | | - .exit = ipv4_frags_exit_net, |
---|
| 702 | + .init = ipv4_frags_init_net, |
---|
| 703 | + .pre_exit = ipv4_frags_pre_exit_net, |
---|
| 704 | + .exit = ipv4_frags_exit_net, |
---|
705 | 705 | }; |
---|
706 | 706 | |
---|
707 | 707 | |
---|