| .. | .. |
|---|
| 171 | 171 | static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) |
|---|
| 172 | 172 | { |
|---|
| 173 | 173 | /* Complain if the scheduler has not started. */ |
|---|
| 174 | | - WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, |
|---|
| 175 | | - "synchronize_rcu_tasks called too soon"); |
|---|
| 174 | + if (WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, |
|---|
| 175 | + "synchronize_%s() called too soon", rtp->name)) |
|---|
| 176 | + return; |
|---|
| 176 | 177 | |
|---|
| 177 | 178 | /* Wait for the grace period. */ |
|---|
| 178 | 179 | wait_rcu_gp(rtp->call_func); |
|---|
| .. | .. |
|---|
| 416 | 417 | static void rcu_tasks_postscan(struct list_head *hop) |
|---|
| 417 | 418 | { |
|---|
| 418 | 419 | /* |
|---|
| 419 | | - * Wait for tasks that are in the process of exiting. This |
|---|
| 420 | | - * does only part of the job, ensuring that all tasks that were |
|---|
| 421 | | - * previously exiting reach the point where they have disabled |
|---|
| 422 | | - * preemption, allowing the later synchronize_rcu() to finish |
|---|
| 423 | | - * the job. |
|---|
| 420 | + * Exiting tasks may escape the tasklist scan. Those are vulnerable |
|---|
| 421 | + * until their final schedule() with TASK_DEAD state. To cope with |
|---|
| 422 | + * this, divide the fragile exit path part in two intersecting |
|---|
| 423 | + * read side critical sections: |
|---|
| 424 | + * |
|---|
| 425 | + * 1) An _SRCU_ read side starting before calling exit_notify(), |
|---|
| 426 | + * which may remove the task from the tasklist, and ending after |
|---|
| 427 | + * the final preempt_disable() call in do_exit(). |
|---|
| 428 | + * |
|---|
| 429 | + * 2) An _RCU_ read side starting with the final preempt_disable() |
|---|
| 430 | + * call in do_exit() and ending with the final call to schedule() |
|---|
| 431 | + * with TASK_DEAD state. |
|---|
| 432 | + * |
|---|
| 433 | + * This handles the part 1). And postgp will handle part 2) with a |
|---|
| 434 | + * call to synchronize_rcu(). |
|---|
| 424 | 435 | */ |
|---|
| 425 | 436 | synchronize_srcu(&tasks_rcu_exit_srcu); |
|---|
| 426 | 437 | } |
|---|
| .. | .. |
|---|
| 487 | 498 | * |
|---|
| 488 | 499 | * In addition, this synchronize_rcu() waits for exiting tasks |
|---|
| 489 | 500 | * to complete their final preempt_disable() region of execution, |
|---|
| 490 | | - * cleaning up after the synchronize_srcu() above. |
|---|
| 501 | + * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu), |
|---|
| 502 | + * enforcing the whole region before tasklist removal until |
|---|
| 503 | + * the final schedule() with TASK_DEAD state to be an RCU TASKS |
|---|
| 504 | + * read side critical section. |
|---|
| 491 | 505 | */ |
|---|
| 492 | 506 | synchronize_rcu(); |
|---|
| 493 | 507 | } |
|---|
| .. | .. |
|---|
| 576 | 590 | } |
|---|
| 577 | 591 | #endif /* #ifndef CONFIG_TINY_RCU */ |
|---|
| 578 | 592 | |
|---|
| 579 | | -/* Do the srcu_read_lock() for the above synchronize_srcu(). */ |
|---|
| 593 | +/* |
|---|
| 594 | + * Contribute to protect against tasklist scan blind spot while the |
|---|
| 595 | + * task is exiting and may be removed from the tasklist. See |
|---|
| 596 | + * corresponding synchronize_srcu() for further details. |
|---|
| 597 | + */ |
|---|
| 580 | 598 | void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) |
|---|
| 581 | 599 | { |
|---|
| 582 | | - preempt_disable(); |
|---|
| 583 | 600 | current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); |
|---|
| 584 | | - preempt_enable(); |
|---|
| 585 | 601 | } |
|---|
| 586 | 602 | |
|---|
| 587 | | -/* Do the srcu_read_unlock() for the above synchronize_srcu(). */ |
|---|
| 588 | | -void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) |
|---|
| 603 | +/* |
|---|
| 604 | + * Contribute to protect against tasklist scan blind spot while the |
|---|
| 605 | + * task is exiting and may be removed from the tasklist. See |
|---|
| 606 | + * corresponding synchronize_srcu() for further details. |
|---|
| 607 | + */ |
|---|
| 608 | +void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) |
|---|
| 589 | 609 | { |
|---|
| 590 | 610 | struct task_struct *t = current; |
|---|
| 591 | 611 | |
|---|
| 592 | | - preempt_disable(); |
|---|
| 593 | 612 | __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); |
|---|
| 594 | | - preempt_enable(); |
|---|
| 595 | | - exit_tasks_rcu_finish_trace(t); |
|---|
| 613 | +} |
|---|
| 614 | + |
|---|
| 615 | +/* |
|---|
| 616 | + * Contribute to protect against tasklist scan blind spot while the |
|---|
| 617 | + * task is exiting and may be removed from the tasklist. See |
|---|
| 618 | + * corresponding synchronize_srcu() for further details. |
|---|
| 619 | + */ |
|---|
| 620 | +void exit_tasks_rcu_finish(void) |
|---|
| 621 | +{ |
|---|
| 622 | + exit_tasks_rcu_stop(); |
|---|
| 623 | + exit_tasks_rcu_finish_trace(current); |
|---|
| 596 | 624 | } |
|---|
| 597 | 625 | |
|---|
| 598 | 626 | #else /* #ifdef CONFIG_TASKS_RCU */ |
|---|
| 599 | 627 | static inline void show_rcu_tasks_classic_gp_kthread(void) { } |
|---|
| 600 | 628 | void exit_tasks_rcu_start(void) { } |
|---|
| 629 | +void exit_tasks_rcu_stop(void) { } |
|---|
| 601 | 630 | void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } |
|---|
| 602 | 631 | #endif /* #else #ifdef CONFIG_TASKS_RCU */ |
|---|
| 603 | 632 | |
|---|
| .. | .. |
|---|
| 620 | 649 | // Wait for one rude RCU-tasks grace period. |
|---|
| 621 | 650 | static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) |
|---|
| 622 | 651 | { |
|---|
| 623 | | - if (num_online_cpus() <= 1) |
|---|
| 624 | | - return; // Fastpath for only one CPU. |
|---|
| 625 | | - |
|---|
| 626 | 652 | rtp->n_ipis += cpumask_weight(cpu_online_mask); |
|---|
| 627 | 653 | schedule_on_each_cpu(rcu_tasks_be_rude); |
|---|
| 628 | 654 | } |
|---|
| .. | .. |
|---|
| 707 | 733 | #endif /* #ifndef CONFIG_TINY_RCU */ |
|---|
| 708 | 734 | |
|---|
| 709 | 735 | #else /* #ifdef CONFIG_TASKS_RUDE_RCU */ |
|---|
| 710 | | -static void show_rcu_tasks_rude_gp_kthread(void) {} |
|---|
| 736 | +static inline void show_rcu_tasks_rude_gp_kthread(void) {} |
|---|
| 711 | 737 | #endif /* #else #ifdef CONFIG_TASKS_RUDE_RCU */ |
|---|
| 712 | 738 | |
|---|
| 713 | 739 | //////////////////////////////////////////////////////////////////////// |
|---|
| .. | .. |
|---|
| 775 | 801 | /* If we are the last reader, wake up the grace-period kthread. */ |
|---|
| 776 | 802 | void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) |
|---|
| 777 | 803 | { |
|---|
| 778 | | - int nq = t->trc_reader_special.b.need_qs; |
|---|
| 804 | + int nq = READ_ONCE(t->trc_reader_special.b.need_qs); |
|---|
| 779 | 805 | |
|---|
| 780 | 806 | if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && |
|---|
| 781 | 807 | t->trc_reader_special.b.need_mb) |
|---|
| .. | .. |
|---|
| 815 | 841 | |
|---|
| 816 | 842 | // If the task is no longer running on this CPU, leave. |
|---|
| 817 | 843 | if (unlikely(texp != t)) { |
|---|
| 818 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
|---|
| 819 | | - wake_up(&trc_wait); |
|---|
| 820 | 844 | goto reset_ipi; // Already on holdout list, so will check later. |
|---|
| 821 | 845 | } |
|---|
| 822 | 846 | |
|---|
| 823 | 847 | // If the task is not in a read-side critical section, and |
|---|
| 824 | 848 | // if this is the last reader, awaken the grace-period kthread. |
|---|
| 825 | | - if (likely(!t->trc_reader_nesting)) { |
|---|
| 826 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
|---|
| 827 | | - wake_up(&trc_wait); |
|---|
| 828 | | - // Mark as checked after decrement to avoid false |
|---|
| 829 | | - // positives on the above WARN_ON_ONCE(). |
|---|
| 849 | + if (likely(!READ_ONCE(t->trc_reader_nesting))) { |
|---|
| 830 | 850 | WRITE_ONCE(t->trc_reader_checked, true); |
|---|
| 831 | 851 | goto reset_ipi; |
|---|
| 832 | 852 | } |
|---|
| 833 | 853 | // If we are racing with an rcu_read_unlock_trace(), try again later. |
|---|
| 834 | | - if (unlikely(t->trc_reader_nesting < 0)) { |
|---|
| 835 | | - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) |
|---|
| 836 | | - wake_up(&trc_wait); |
|---|
| 854 | + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) |
|---|
| 837 | 855 | goto reset_ipi; |
|---|
| 838 | | - } |
|---|
| 839 | 856 | WRITE_ONCE(t->trc_reader_checked, true); |
|---|
| 840 | 857 | |
|---|
| 841 | 858 | // Get here if the task is in a read-side critical section. Set |
|---|
| 842 | 859 | // its state so that it will awaken the grace-period kthread upon |
|---|
| 843 | 860 | // exit from that critical section. |
|---|
| 844 | | - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); |
|---|
| 861 | + atomic_inc(&trc_n_readers_need_end); // One more to wait on. |
|---|
| 862 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); |
|---|
| 845 | 863 | WRITE_ONCE(t->trc_reader_special.b.need_qs, true); |
|---|
| 846 | 864 | |
|---|
| 847 | 865 | reset_ipi: |
|---|
| .. | .. |
|---|
| 856 | 874 | static bool trc_inspect_reader(struct task_struct *t, void *arg) |
|---|
| 857 | 875 | { |
|---|
| 858 | 876 | int cpu = task_cpu(t); |
|---|
| 859 | | - bool in_qs = false; |
|---|
| 877 | + int nesting; |
|---|
| 860 | 878 | bool ofl = cpu_is_offline(cpu); |
|---|
| 861 | 879 | |
|---|
| 862 | 880 | if (task_curr(t)) { |
|---|
| .. | .. |
|---|
| 876 | 894 | n_heavy_reader_updates++; |
|---|
| 877 | 895 | if (ofl) |
|---|
| 878 | 896 | n_heavy_reader_ofl_updates++; |
|---|
| 879 | | - in_qs = true; |
|---|
| 897 | + nesting = 0; |
|---|
| 880 | 898 | } else { |
|---|
| 881 | | - in_qs = likely(!t->trc_reader_nesting); |
|---|
| 899 | + // The task is not running, so C-language access is safe. |
|---|
| 900 | + nesting = t->trc_reader_nesting; |
|---|
| 882 | 901 | } |
|---|
| 883 | 902 | |
|---|
| 884 | | - // Mark as checked so that the grace-period kthread will |
|---|
| 885 | | - // remove it from the holdout list. |
|---|
| 886 | | - t->trc_reader_checked = true; |
|---|
| 887 | | - |
|---|
| 888 | | - if (in_qs) |
|---|
| 889 | | - return true; // Already in quiescent state, done!!! |
|---|
| 903 | + // If not exiting a read-side critical section, mark as checked |
|---|
| 904 | + // so that the grace-period kthread will remove it from the |
|---|
| 905 | + // holdout list. |
|---|
| 906 | + t->trc_reader_checked = nesting >= 0; |
|---|
| 907 | + if (nesting <= 0) |
|---|
| 908 | + return !nesting; // If in QS, done, otherwise try again later. |
|---|
| 890 | 909 | |
|---|
| 891 | 910 | // The task is in a read-side critical section, so set up its |
|---|
| 892 | 911 | // state so that it will awaken the grace-period kthread upon exit |
|---|
| 893 | 912 | // from that critical section. |
|---|
| 894 | 913 | atomic_inc(&trc_n_readers_need_end); // One more to wait on. |
|---|
| 895 | | - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); |
|---|
| 914 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); |
|---|
| 896 | 915 | WRITE_ONCE(t->trc_reader_special.b.need_qs, true); |
|---|
| 897 | 916 | return true; |
|---|
| 898 | 917 | } |
|---|
| .. | .. |
|---|
| 910 | 929 | // The current task had better be in a quiescent state. |
|---|
| 911 | 930 | if (t == current) { |
|---|
| 912 | 931 | t->trc_reader_checked = true; |
|---|
| 913 | | - WARN_ON_ONCE(t->trc_reader_nesting); |
|---|
| 932 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); |
|---|
| 914 | 933 | return; |
|---|
| 915 | 934 | } |
|---|
| 916 | 935 | |
|---|
| .. | .. |
|---|
| 933 | 952 | if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) |
|---|
| 934 | 953 | return; |
|---|
| 935 | 954 | |
|---|
| 936 | | - atomic_inc(&trc_n_readers_need_end); |
|---|
| 937 | 955 | per_cpu(trc_ipi_to_cpu, cpu) = true; |
|---|
| 938 | 956 | t->trc_ipi_to_cpu = cpu; |
|---|
| 939 | 957 | rcu_tasks_trace.n_ipis++; |
|---|
| 940 | | - if (smp_call_function_single(cpu, |
|---|
| 941 | | - trc_read_check_handler, t, 0)) { |
|---|
| 958 | + if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { |
|---|
| 942 | 959 | // Just in case there is some other reason for |
|---|
| 943 | 960 | // failure than the target CPU being offline. |
|---|
| 961 | + WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n", |
|---|
| 962 | + __func__, cpu); |
|---|
| 944 | 963 | rcu_tasks_trace.n_ipis_fails++; |
|---|
| 945 | 964 | per_cpu(trc_ipi_to_cpu, cpu) = false; |
|---|
| 946 | | - t->trc_ipi_to_cpu = cpu; |
|---|
| 947 | | - if (atomic_dec_and_test(&trc_n_readers_need_end)) { |
|---|
| 948 | | - WARN_ON_ONCE(1); |
|---|
| 949 | | - wake_up(&trc_wait); |
|---|
| 950 | | - } |
|---|
| 965 | + t->trc_ipi_to_cpu = -1; |
|---|
| 951 | 966 | } |
|---|
| 952 | 967 | } |
|---|
| 953 | 968 | } |
|---|
| .. | .. |
|---|
| 1020 | 1035 | ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], |
|---|
| 1021 | 1036 | ".i"[is_idle_task(t)], |
|---|
| 1022 | 1037 | ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], |
|---|
| 1023 | | - t->trc_reader_nesting, |
|---|
| 1024 | | - " N"[!!t->trc_reader_special.b.need_qs], |
|---|
| 1038 | + READ_ONCE(t->trc_reader_nesting), |
|---|
| 1039 | + " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], |
|---|
| 1025 | 1040 | cpu); |
|---|
| 1026 | 1041 | sched_show_task(t); |
|---|
| 1027 | 1042 | } |
|---|
| .. | .. |
|---|
| 1068 | 1083 | } |
|---|
| 1069 | 1084 | } |
|---|
| 1070 | 1085 | |
|---|
| 1086 | +static void rcu_tasks_trace_empty_fn(void *unused) |
|---|
| 1087 | +{ |
|---|
| 1088 | +} |
|---|
| 1089 | + |
|---|
| 1071 | 1090 | /* Wait for grace period to complete and provide ordering. */ |
|---|
| 1072 | 1091 | static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) |
|---|
| 1073 | 1092 | { |
|---|
| 1093 | + int cpu; |
|---|
| 1074 | 1094 | bool firstreport; |
|---|
| 1075 | 1095 | struct task_struct *g, *t; |
|---|
| 1076 | 1096 | LIST_HEAD(holdouts); |
|---|
| 1077 | 1097 | long ret; |
|---|
| 1098 | + |
|---|
| 1099 | + // Wait for any lingering IPI handlers to complete. Note that |
|---|
| 1100 | + // if a CPU has gone offline or transitioned to userspace in the |
|---|
| 1101 | + // meantime, all IPI handlers should have been drained beforehand. |
|---|
| 1102 | + // Yes, this assumes that CPUs process IPIs in order. If that ever |
|---|
| 1103 | + // changes, there will need to be a recheck and/or timed wait. |
|---|
| 1104 | + for_each_online_cpu(cpu) |
|---|
| 1105 | + if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))) |
|---|
| 1106 | + smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); |
|---|
| 1078 | 1107 | |
|---|
| 1079 | 1108 | // Remove the safety count. |
|---|
| 1080 | 1109 | smp_mb__before_atomic(); // Order vs. earlier atomics |
|---|
| .. | .. |
|---|
| 1115 | 1144 | static void exit_tasks_rcu_finish_trace(struct task_struct *t) |
|---|
| 1116 | 1145 | { |
|---|
| 1117 | 1146 | WRITE_ONCE(t->trc_reader_checked, true); |
|---|
| 1118 | | - WARN_ON_ONCE(t->trc_reader_nesting); |
|---|
| 1147 | + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); |
|---|
| 1119 | 1148 | WRITE_ONCE(t->trc_reader_nesting, 0); |
|---|
| 1120 | 1149 | if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) |
|---|
| 1121 | 1150 | rcu_read_unlock_trace_special(t, 0); |
|---|