hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/block/blk-mq.c
....@@ -43,7 +43,7 @@
4343
4444 #include <trace/hooks/block.h>
4545
46
-static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
46
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
4747
4848 static void blk_mq_poll_stats_start(struct request_queue *q);
4949 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
....@@ -451,7 +451,8 @@
451451 * allocator for this for the rare use case of a command tied to
452452 * a specific queue.
453453 */
454
- if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
454
+ if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)) ||
455
+ WARN_ON_ONCE(!(flags & BLK_MQ_REQ_RESERVED)))
455456 return ERR_PTR(-EINVAL);
456457
457458 if (hctx_idx >= q->nr_hw_queues)
....@@ -571,29 +572,80 @@
571572 }
572573 EXPORT_SYMBOL(blk_mq_end_request);
573574
574
-static void blk_complete_reqs(struct llist_head *list)
575
-{
576
- struct llist_node *entry = llist_reverse_order(llist_del_all(list));
577
- struct request *rq, *next;
578
-
579
- llist_for_each_entry_safe(rq, next, entry, ipi_list)
580
- rq->q->mq_ops->complete(rq);
581
-}
582
-
575
+/*
576
+ * Softirq action handler - move entries to local list and loop over them
577
+ * while passing them to the queue registered handler.
578
+ */
583579 static __latent_entropy void blk_done_softirq(struct softirq_action *h)
584580 {
585
- blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
581
+ struct list_head *cpu_list, local_list;
582
+
583
+ local_irq_disable();
584
+ cpu_list = this_cpu_ptr(&blk_cpu_done);
585
+ list_replace_init(cpu_list, &local_list);
586
+ local_irq_enable();
587
+
588
+ while (!list_empty(&local_list)) {
589
+ struct request *rq;
590
+
591
+ rq = list_entry(local_list.next, struct request, ipi_list);
592
+ list_del_init(&rq->ipi_list);
593
+ rq->q->mq_ops->complete(rq);
594
+ }
595
+}
596
+
597
+static void blk_mq_trigger_softirq(struct request *rq)
598
+{
599
+ struct list_head *list;
600
+ unsigned long flags;
601
+
602
+ local_irq_save(flags);
603
+ list = this_cpu_ptr(&blk_cpu_done);
604
+ list_add_tail(&rq->ipi_list, list);
605
+
606
+ /*
607
+ * If the list only contains our just added request, signal a raise of
608
+ * the softirq. If there are already entries there, someone already
609
+ * raised the irq but it hasn't run yet.
610
+ */
611
+ if (list->next == &rq->ipi_list)
612
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
613
+ local_irq_restore(flags);
586614 }
587615
588616 static int blk_softirq_cpu_dead(unsigned int cpu)
589617 {
590
- blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
618
+ /*
619
+ * If a CPU goes away, splice its entries to the current CPU
620
+ * and trigger a run of the softirq
621
+ */
622
+ local_irq_disable();
623
+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
624
+ this_cpu_ptr(&blk_cpu_done));
625
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
626
+ local_irq_enable();
627
+
591628 return 0;
592629 }
593630
631
+
594632 static void __blk_mq_complete_request_remote(void *data)
595633 {
596
- __raise_softirq_irqoff(BLOCK_SOFTIRQ);
634
+ struct request *rq = data;
635
+
636
+ /*
637
+ * For most of single queue controllers, there is only one irq vector
638
+ * for handling I/O completion, and the only irq's affinity is set
639
+ * to all possible CPUs. On most of ARCHs, this affinity means the irq
640
+ * is handled on one specific CPU.
641
+ *
642
+ * So complete I/O requests in softirq context in case of single queue
643
+ * devices to avoid degrading I/O performance due to irqsoff latency.
644
+ */
645
+ if (rq->q->nr_hw_queues == 1)
646
+ blk_mq_trigger_softirq(rq);
647
+ else
648
+ rq->q->mq_ops->complete(rq);
597649 }
598650
599651 static inline bool blk_mq_complete_need_ipi(struct request *rq)
....@@ -602,14 +654,6 @@
602654
603655 if (!IS_ENABLED(CONFIG_SMP) ||
604656 !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
605
- return false;
606
- /*
607
- * With force threaded interrupts enabled, raising softirq from an SMP
608
- * function call will always result in waking the ksoftirqd thread.
609
- * This is probably worse than completing the request on a different
610
- * cache domain.
611
- */
612
- if (force_irqthreads)
613657 return false;
614658
615659 /* same CPU or cache domain? Complete locally */
....@@ -620,32 +664,6 @@
620664
621665 /* don't try to IPI to an offline CPU */
622666 return cpu_online(rq->mq_ctx->cpu);
623
-}
624
-
625
-static void blk_mq_complete_send_ipi(struct request *rq)
626
-{
627
- struct llist_head *list;
628
- unsigned int cpu;
629
-
630
- cpu = rq->mq_ctx->cpu;
631
- list = &per_cpu(blk_cpu_done, cpu);
632
- if (llist_add(&rq->ipi_list, list)) {
633
- rq->csd.func = __blk_mq_complete_request_remote;
634
- rq->csd.info = rq;
635
- rq->csd.flags = 0;
636
- smp_call_function_single_async(cpu, &rq->csd);
637
- }
638
-}
639
-
640
-static void blk_mq_raise_softirq(struct request *rq)
641
-{
642
- struct llist_head *list;
643
-
644
- preempt_disable();
645
- list = this_cpu_ptr(&blk_cpu_done);
646
- if (llist_add(&rq->ipi_list, list))
647
- raise_softirq(BLOCK_SOFTIRQ);
648
- preempt_enable();
649667 }
650668
651669 bool blk_mq_complete_request_remote(struct request *rq)
....@@ -660,15 +678,17 @@
660678 return false;
661679
662680 if (blk_mq_complete_need_ipi(rq)) {
663
- blk_mq_complete_send_ipi(rq);
664
- return true;
681
+ rq->csd.func = __blk_mq_complete_request_remote;
682
+ rq->csd.info = rq;
683
+ rq->csd.flags = 0;
684
+ smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
685
+ } else {
686
+ if (rq->q->nr_hw_queues > 1)
687
+ return false;
688
+ blk_mq_trigger_softirq(rq);
665689 }
666690
667
- if (rq->q->nr_hw_queues == 1) {
668
- blk_mq_raise_softirq(rq);
669
- return true;
670
- }
671
- return false;
691
+ return true;
672692 }
673693 EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
674694
....@@ -1577,14 +1597,14 @@
15771597 return;
15781598
15791599 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
1580
- int cpu = get_cpu_light();
1600
+ int cpu = get_cpu();
15811601 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
15821602 __blk_mq_run_hw_queue(hctx);
1583
- put_cpu_light();
1603
+ put_cpu();
15841604 return;
15851605 }
15861606
1587
- put_cpu_light();
1607
+ put_cpu();
15881608 }
15891609
15901610 kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
....@@ -2228,7 +2248,7 @@
22282248
22292249 blk_mq_bio_to_request(rq, bio, nr_segs);
22302250
2231
- ret = blk_crypto_init_request(rq);
2251
+ ret = blk_crypto_rq_get_keyslot(rq);
22322252 if (ret != BLK_STS_OK) {
22332253 bio->bi_status = ret;
22342254 bio_endio(bio);
....@@ -4019,7 +4039,7 @@
40194039 int i;
40204040
40214041 for_each_possible_cpu(i)
4022
- init_llist_head(&per_cpu(blk_cpu_done, i));
4042
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
40234043 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
40244044
40254045 cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,