.. | .. |
---|
171 | 171 | static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) |
---|
172 | 172 | { |
---|
173 | 173 | /* Complain if the scheduler has not started. */ |
---|
174 | | - WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, |
---|
175 | | - "synchronize_rcu_tasks called too soon"); |
---|
| 174 | + if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, |
---|
| 175 | + "synchronize_%s() called too soon", rtp->name)) |
---|
| 176 | + return; |
---|
176 | 177 | |
---|
177 | 178 | /* Wait for the grace period. */ |
---|
178 | 179 | wait_rcu_gp(rtp->call_func); |
---|
.. | .. |
---|
416 | 417 | static void rcu_tasks_postscan(struct list_head *hop) |
---|
417 | 418 | { |
---|
418 | 419 | /* |
---|
419 | | - * Wait for tasks that are in the process of exiting. This |
---|
420 | | - * does only part of the job, ensuring that all tasks that were |
---|
421 | | - * previously exiting reach the point where they have disabled |
---|
422 | | - * preemption, allowing the later synchronize_rcu() to finish |
---|
423 | | - * the job. |
---|
| 420 | + * Exiting tasks may escape the tasklist scan. Those are vulnerable |
---|
| 421 | + * until their final schedule() with TASK_DEAD state. To cope with |
---|
| 422 | + * this, divide the fragile exit path part in two intersecting |
---|
| 423 | + * read side critical sections: |
---|
| 424 | + * |
---|
| 425 | + * 1) An _SRCU_ read side starting before calling exit_notify(), |
---|
| 426 | + * which may remove the task from the tasklist, and ending after |
---|
| 427 | + * the final preempt_disable() call in do_exit(). |
---|
| 428 | + * |
---|
| 429 | + * 2) An _RCU_ read side starting with the final preempt_disable() |
---|
| 430 | + * call in do_exit() and ending with the final call to schedule() |
---|
| 431 | + * with TASK_DEAD state. |
---|
| 432 | + * |
---|
| 433 | + * This handles the part 1). And postgp will handle part 2) with a |
---|
| 434 | + * call to synchronize_rcu(). |
---|
424 | 435 | */ |
---|
425 | 436 | synchronize_srcu(&tasks_rcu_exit_srcu); |
---|
426 | 437 | } |
---|
.. | .. |
---|
487 | 498 | * |
---|
488 | 499 | * In addition, this synchronize_rcu() waits for exiting tasks |
---|
489 | 500 | * to complete their final preempt_disable() region of execution, |
---|
490 | | - * cleaning up after the synchronize_srcu() above. |
---|
| 501 | + * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu), |
---|
| 502 | + * enforcing the whole region before tasklist removal until |
---|
| 503 | + * the final schedule() with TASK_DEAD state to be an RCU TASKS |
---|
| 504 | + * read side critical section. |
---|
491 | 505 | */ |
---|
492 | 506 | synchronize_rcu(); |
---|
493 | 507 | } |
---|
.. | .. |
---|
576 | 590 | } |
---|
577 | 591 | #endif /* #ifndef CONFIG_TINY_RCU */ |
---|
578 | 592 | |
---|
579 | | -/* Do the srcu_read_lock() for the above synchronize_srcu(). */ |
---|
| 593 | +/* |
---|
| 594 | + * Contribute to protect against tasklist scan blind spot while the |
---|
| 595 | + * task is exiting and may be removed from the tasklist. See |
---|
| 596 | + * corresponding synchronize_srcu() for further details. |
---|
| 597 | + */ |
---|
580 | 598 | void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) |
---|
581 | 599 | { |
---|
582 | | - preempt_disable(); |
---|
583 | 600 | current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); |
---|
584 | | - preempt_enable(); |
---|
585 | 601 | } |
---|
586 | 602 | |
---|
587 | | -/* Do the srcu_read_unlock() for the above synchronize_srcu(). */ |
---|
588 | | -void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) |
---|
| 603 | +/* |
---|
| 604 | + * Contribute to protect against tasklist scan blind spot while the |
---|
| 605 | + * task is exiting and may be removed from the tasklist. See |
---|
| 606 | + * corresponding synchronize_srcu() for further details. |
---|
| 607 | + */ |
---|
| 608 | +void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) |
---|
589 | 609 | { |
---|
590 | 610 | struct task_struct *t = current; |
---|
591 | 611 | |
---|
592 | | - preempt_disable(); |
---|
593 | 612 | __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); |
---|
594 | | - preempt_enable(); |
---|
595 | | - exit_tasks_rcu_finish_trace(t); |
---|
| 613 | +} |
---|
| 614 | + |
---|
| 615 | +/* |
---|
| 616 | + * Contribute to protect against tasklist scan blind spot while the |
---|
| 617 | + * task is exiting and may be removed from the tasklist. See |
---|
| 618 | + * corresponding synchronize_srcu() for further details. |
---|
| 619 | + */ |
---|
| 620 | +void exit_tasks_rcu_finish(void) |
---|
| 621 | +{ |
---|
| 622 | + exit_tasks_rcu_stop(); |
---|
| 623 | + exit_tasks_rcu_finish_trace(current); |
---|
596 | 624 | } |
---|
597 | 625 | |
---|
598 | 626 | #else /* #ifdef CONFIG_TASKS_RCU */ |
---|
599 | 627 | static inline void show_rcu_tasks_classic_gp_kthread(void) { } |
---|
600 | 628 | void exit_tasks_rcu_start(void) { } |
---|
| 629 | +void exit_tasks_rcu_stop(void) { } |
---|
601 | 630 | void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } |
---|
602 | 631 | #endif /* #else #ifdef CONFIG_TASKS_RCU */ |
---|
603 | 632 | |
---|
.. | .. |
---|
620 | 649 | // Wait for one rude RCU-tasks grace period. |
---|
621 | 650 | static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) |
---|
622 | 651 | { |
---|
623 | | - if (num_online_cpus() <= 1) |
---|
624 | | - return; // Fastpath for only one CPU. |
---|
625 | | - |
---|
626 | 652 | rtp->n_ipis += cpumask_weight(cpu_online_mask); |
---|
627 | 653 | schedule_on_each_cpu(rcu_tasks_be_rude); |
---|
628 | 654 | } |
---|
.. | .. |
---|
707 | 733 | #endif /* #ifndef CONFIG_TINY_RCU */ |
---|
708 | 734 | |
---|
709 | 735 | #else /* #ifdef CONFIG_TASKS_RUDE_RCU */ |
---|
710 | | -static void show_rcu_tasks_rude_gp_kthread(void) {} |
---|
| 736 | +static inline void show_rcu_tasks_rude_gp_kthread(void) {} |
---|
711 | 737 | #endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */ |
---|
712 | 738 | |
---|
713 | 739 | //////////////////////////////////////////////////////////////////////// |
---|
.. | .. |
---|
775 | 801 | /* If we are the last reader, wake up the grace-period kthread. */ |
---|
776 | 802 | void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) |
---|
777 | 803 | { |
---|
778 | | - int nq = t->trc_reader_special.b.need_qs; |
---|
| 804 | + int nq = READ_ONCE(t->trc_reader_special.b.need_qs); |
---|
779 | 805 | |
---|
780 | 806 | if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && |
---|
781 | 807 | t->trc_reader_special.b.need_mb) |
---|
.. | .. |
---|
815 | 841 | |
---|
816 | 842 | // If the task is no longer running on this CPU, leave. |
---|
817 | 843 | if (unlikely(texp != t)) { |
---|
818 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
---|
819 | | - wake_up(&trc_wait); |
---|
820 | 844 | goto reset_ipi; // Already on holdout list, so will check later. |
---|
821 | 845 | } |
---|
822 | 846 | |
---|
823 | 847 | // If the task is not in a read-side critical section, and |
---|
824 | 848 | // if this is the last reader, awaken the grace-period kthread. |
---|
825 | | - if (likely(!t->trc_reader_nesting)) { |
---|
826 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
---|
827 | | - wake_up(&trc_wait); |
---|
828 | | - // Mark as checked after decrement to avoid false |
---|
829 | | - // positives on the above WARN_ON_ONCE(). |
---|
| 849 | + if (likely(!READ_ONCE(t->trc_reader_nesting))) { |
---|
830 | 850 | WRITE_ONCE(t->trc_reader_checked, true); |
---|
831 | 851 | goto reset_ipi; |
---|
832 | 852 | } |
---|
833 | 853 | // If we are racing with an rcu_read_unlock_trace(), try again later. |
---|
834 | | - if (unlikely(t->trc_reader_nesting < 0)) { |
---|
835 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
---|
836 | | - wake_up(&trc_wait); |
---|
| 854 | + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) |
---|
837 | 855 | goto reset_ipi; |
---|
838 | | - } |
---|
839 | 856 | WRITE_ONCE(t->trc_reader_checked, true); |
---|
840 | 857 | |
---|
841 | 858 | // Get here if the task is in a read-side critical section. Set |
---|
842 | 859 | // its state so that it will awaken the grace-period kthread upon |
---|
843 | 860 | // exit from that critical section. |
---|
844 | | - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); |
---|
| 861 | + atomic_inc(&trc_n_readers_need_end); // One more to wait on. |
---|
| 862 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); |
---|
845 | 863 | WRITE_ONCE(t->trc_reader_special.b.need_qs, true); |
---|
846 | 864 | |
---|
847 | 865 | reset_ipi: |
---|
.. | .. |
---|
856 | 874 | static bool trc_inspect_reader(struct task_struct *t, void *arg) |
---|
857 | 875 | { |
---|
858 | 876 | int cpu = task_cpu(t); |
---|
859 | | - bool in_qs = false; |
---|
| 877 | + int nesting; |
---|
860 | 878 | bool ofl = cpu_is_offline(cpu); |
---|
861 | 879 | |
---|
862 | 880 | if (task_curr(t)) { |
---|
.. | .. |
---|
876 | 894 | n_heavy_reader_updates++; |
---|
877 | 895 | if (ofl) |
---|
878 | 896 | n_heavy_reader_ofl_updates++; |
---|
879 | | - in_qs = true; |
---|
| 897 | + nesting = 0; |
---|
880 | 898 | } else { |
---|
881 | | - in_qs = likely(!t->trc_reader_nesting); |
---|
| 899 | + // The task is not running, so C-language access is safe. |
---|
| 900 | + nesting = t->trc_reader_nesting; |
---|
882 | 901 | } |
---|
883 | 902 | |
---|
884 | | - // Mark as checked so that the grace-period kthread will |
---|
885 | | - // remove it from the holdout list. |
---|
886 | | - t->trc_reader_checked = true; |
---|
887 | | - |
---|
888 | | - if (in_qs) |
---|
889 | | - return true; // Already in quiescent state, done!!! |
---|
| 903 | + // If not exiting a read-side critical section, mark as checked |
---|
| 904 | + // so that the grace-period kthread will remove it from the |
---|
| 905 | + // holdout list. |
---|
| 906 | + t->trc_reader_checked = nesting >= 0; |
---|
| 907 | + if (nesting <= 0) |
---|
| 908 | + return !nesting; // If in QS, done, otherwise try again later. |
---|
890 | 909 | |
---|
891 | 910 | // The task is in a read-side critical section, so set up its |
---|
892 | 911 | // state so that it will awaken the grace-period kthread upon exit |
---|
893 | 912 | // from that critical section. |
---|
894 | 913 | atomic_inc(&trc_n_readers_need_end); // One more to wait on. |
---|
895 | | - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); |
---|
| 914 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); |
---|
896 | 915 | WRITE_ONCE(t->trc_reader_special.b.need_qs, true); |
---|
897 | 916 | return true; |
---|
898 | 917 | } |
---|
.. | .. |
---|
910 | 929 | // The current task had better be in a quiescent state. |
---|
911 | 930 | if (t == current) { |
---|
912 | 931 | t->trc_reader_checked = true; |
---|
913 | | - WARN_ON_ONCE(t->trc_reader_nesting); |
---|
| 932 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); |
---|
914 | 933 | return; |
---|
915 | 934 | } |
---|
916 | 935 | |
---|
.. | .. |
---|
933 | 952 | if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) |
---|
934 | 953 | return; |
---|
935 | 954 | |
---|
936 | | - atomic_inc(&trc_n_readers_need_end); |
---|
937 | 955 | per_cpu(trc_ipi_to_cpu, cpu) = true; |
---|
938 | 956 | t->trc_ipi_to_cpu = cpu; |
---|
939 | 957 | rcu_tasks_trace.n_ipis++; |
---|
940 | | - if (smp_call_function_single(cpu, |
---|
941 | | - trc_read_check_handler, t, 0)) { |
---|
| 958 | + if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { |
---|
942 | 959 | // Just in case there is some other reason for |
---|
943 | 960 | // failure than the target CPU being offline. |
---|
| 961 | + WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n", |
---|
| 962 | + __func__, cpu); |
---|
944 | 963 | rcu_tasks_trace.n_ipis_fails++; |
---|
945 | 964 | per_cpu(trc_ipi_to_cpu, cpu) = false; |
---|
946 | | - t->trc_ipi_to_cpu = cpu; |
---|
947 | | - if (atomic_dec_and_test(&trc_n_readers_need_end)) { |
---|
948 | | - WARN_ON_ONCE(1); |
---|
949 | | - wake_up(&trc_wait); |
---|
950 | | - } |
---|
| 965 | + t->trc_ipi_to_cpu = -1; |
---|
951 | 966 | } |
---|
952 | 967 | } |
---|
953 | 968 | } |
---|
.. | .. |
---|
1020 | 1035 | ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], |
---|
1021 | 1036 | ".i"[is_idle_task(t)], |
---|
1022 | 1037 | ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], |
---|
1023 | | - t->trc_reader_nesting, |
---|
1024 | | - " N"[!!t->trc_reader_special.b.need_qs], |
---|
| 1038 | + READ_ONCE(t->trc_reader_nesting), |
---|
| 1039 | + " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], |
---|
1025 | 1040 | cpu); |
---|
1026 | 1041 | sched_show_task(t); |
---|
1027 | 1042 | } |
---|
.. | .. |
---|
1068 | 1083 | } |
---|
1069 | 1084 | } |
---|
1070 | 1085 | |
---|
| 1086 | +static void rcu_tasks_trace_empty_fn(void *unused) |
---|
| 1087 | +{ |
---|
| 1088 | +} |
---|
| 1089 | + |
---|
1071 | 1090 | /* Wait for grace period to complete and provide ordering. */ |
---|
1072 | 1091 | static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) |
---|
1073 | 1092 | { |
---|
| 1093 | + int cpu; |
---|
1074 | 1094 | bool firstreport; |
---|
1075 | 1095 | struct task_struct *g, *t; |
---|
1076 | 1096 | LIST_HEAD(holdouts); |
---|
1077 | 1097 | long ret; |
---|
| 1098 | + |
---|
| 1099 | + // Wait for any lingering IPI handlers to complete. Note that |
---|
| 1100 | + // if a CPU has gone offline or transitioned to userspace in the |
---|
| 1101 | + // meantime, all IPI handlers should have been drained beforehand. |
---|
| 1102 | + // Yes, this assumes that CPUs process IPIs in order. If that ever |
---|
| 1103 | + // changes, there will need to be a recheck and/or timed wait. |
---|
| 1104 | + for_each_online_cpu(cpu) |
---|
| 1105 | + if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))) |
---|
| 1106 | + smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); |
---|
1078 | 1107 | |
---|
1079 | 1108 | // Remove the safety count. |
---|
1080 | 1109 | smp_mb__before_atomic(); // Order vs. earlier atomics |
---|
.. | .. |
---|
1115 | 1144 | static void exit_tasks_rcu_finish_trace(struct task_struct *t) |
---|
1116 | 1145 | { |
---|
1117 | 1146 | WRITE_ONCE(t->trc_reader_checked, true); |
---|
1118 | | - WARN_ON_ONCE(t->trc_reader_nesting); |
---|
| 1147 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); |
---|
1119 | 1148 | WRITE_ONCE(t->trc_reader_nesting, 0); |
---|
1120 | 1149 | if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) |
---|
1121 | 1150 | rcu_read_unlock_trace_special(t, 0); |
---|