| .. | .. |
|---|
| 8 | 8 | #include <linux/cpuhotplug.h> |
|---|
| 9 | 9 | #include <linux/kasan.h> |
|---|
| 10 | 10 | #include <linux/mm.h> |
|---|
| 11 | | -#include <linux/mmzone.h> |
|---|
| 12 | 11 | #include <linux/scs.h> |
|---|
| 13 | | -#include <linux/slab.h> |
|---|
| 14 | 12 | #include <linux/vmalloc.h> |
|---|
| 15 | 13 | #include <linux/vmstat.h> |
|---|
| 16 | | -#include <asm/scs.h> |
|---|
| 17 | 14 | |
|---|
| 18 | | -static inline void *__scs_base(struct task_struct *tsk) |
|---|
| 15 | +static void __scs_account(void *s, int account) |
|---|
| 19 | 16 | { |
|---|
| 20 | | - /* |
|---|
| 21 | | - * To minimize risk the of exposure, architectures may clear a |
|---|
| 22 | | - * task's thread_info::shadow_call_stack while that task is |
|---|
| 23 | | - * running, and only save/restore the active shadow call stack |
|---|
| 24 | | - * pointer when the usual register may be clobbered (e.g. across |
|---|
| 25 | | - * context switches). |
|---|
| 26 | | - * |
|---|
| 27 | | - * The shadow call stack is aligned to SCS_SIZE, and grows |
|---|
| 28 | | - * upwards, so we can mask out the low bits to extract the base |
|---|
| 29 | | - * when the task is not running. |
|---|
| 30 | | - */ |
|---|
| 31 | | - return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1)); |
|---|
| 32 | | -} |
|---|
| 17 | + struct page *scs_page = vmalloc_to_page(s); |
|---|
| 33 | 18 | |
|---|
| 34 | | -static inline unsigned long *scs_magic(void *s) |
|---|
| 35 | | -{ |
|---|
| 36 | | - return (unsigned long *)(s + SCS_SIZE) - 1; |
|---|
| 19 | + mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB, |
|---|
| 20 | + account * (SCS_SIZE / SZ_1K)); |
|---|
| 37 | 21 | } |
|---|
| 38 | | - |
|---|
| 39 | | -static inline void scs_set_magic(void *s) |
|---|
| 40 | | -{ |
|---|
| 41 | | - *scs_magic(s) = SCS_END_MAGIC; |
|---|
| 42 | | -} |
|---|
| 43 | | - |
|---|
| 44 | | -#ifdef CONFIG_SHADOW_CALL_STACK_VMAP |
|---|
| 45 | 22 | |
|---|
| 46 | 23 | /* Matches NR_CACHED_STACKS for VMAP_STACK */ |
|---|
| 47 | 24 | #define NR_CACHED_SCS 2 |
|---|
| 48 | 25 | static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]); |
|---|
| 49 | 26 | |
|---|
| 50 | | -static void *scs_alloc(int node) |
|---|
| 27 | +static void *__scs_alloc(int node) |
|---|
| 51 | 28 | { |
|---|
| 52 | 29 | int i; |
|---|
| 53 | 30 | void *s; |
|---|
| .. | .. |
|---|
| 55 | 32 | for (i = 0; i < NR_CACHED_SCS; i++) { |
|---|
| 56 | 33 | s = this_cpu_xchg(scs_cache[i], NULL); |
|---|
| 57 | 34 | if (s) { |
|---|
| 35 | + kasan_unpoison_vmalloc(s, SCS_SIZE); |
|---|
| 58 | 36 | memset(s, 0, SCS_SIZE); |
|---|
| 59 | | - goto out; |
|---|
| 37 | + return s; |
|---|
| 60 | 38 | } |
|---|
| 61 | 39 | } |
|---|
| 62 | 40 | |
|---|
| 41 | + return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END, |
|---|
| 42 | + GFP_SCS, PAGE_KERNEL, 0, node, |
|---|
| 43 | + __builtin_return_address(0)); |
|---|
| 44 | +} |
|---|
| 45 | + |
|---|
| 46 | +void *scs_alloc(int node) |
|---|
| 47 | +{ |
|---|
| 48 | + void *s; |
|---|
| 49 | + |
|---|
| 50 | + s = __scs_alloc(node); |
|---|
| 51 | + if (!s) |
|---|
| 52 | + return NULL; |
|---|
| 53 | + |
|---|
| 54 | + *__scs_magic(s) = SCS_END_MAGIC; |
|---|
| 55 | + |
|---|
| 63 | 56 | /* |
|---|
| 64 | | - * We allocate a full page for the shadow stack, which should be |
|---|
| 65 | | - * more than we need. Check the assumption nevertheless. |
|---|
| 57 | + * Poison the allocation to catch unintentional accesses to |
|---|
| 58 | + * the shadow stack when KASAN is enabled. |
|---|
| 66 | 59 | */ |
|---|
| 67 | | - BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE); |
|---|
| 68 | | - |
|---|
| 69 | | - s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, |
|---|
| 70 | | - VMALLOC_START, VMALLOC_END, |
|---|
| 71 | | - GFP_SCS, PAGE_KERNEL, 0, |
|---|
| 72 | | - node, __builtin_return_address(0)); |
|---|
| 73 | | - |
|---|
| 74 | | -out: |
|---|
| 75 | | - if (s) |
|---|
| 76 | | - scs_set_magic(s); |
|---|
| 77 | | - /* TODO: poison for KASAN, unpoison in scs_free */ |
|---|
| 78 | | - |
|---|
| 60 | + kasan_poison_vmalloc(s, SCS_SIZE); |
|---|
| 61 | + __scs_account(s, 1); |
|---|
| 79 | 62 | return s; |
|---|
| 80 | 63 | } |
|---|
| 81 | 64 | |
|---|
| 82 | | -static void scs_free(void *s) |
|---|
| 65 | +void scs_free(void *s) |
|---|
| 83 | 66 | { |
|---|
| 84 | 67 | int i; |
|---|
| 68 | + |
|---|
| 69 | + __scs_account(s, -1); |
|---|
| 70 | + |
|---|
| 71 | + /* |
|---|
| 72 | + * We cannot sleep as this can be called in interrupt context, |
|---|
| 73 | + * so use this_cpu_cmpxchg to update the cache, and vfree_atomic |
|---|
| 74 | + * to free the stack. |
|---|
| 75 | + */ |
|---|
| 85 | 76 | |
|---|
| 86 | 77 | for (i = 0; i < NR_CACHED_SCS; i++) |
|---|
| 87 | 78 | if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) |
|---|
| 88 | 79 | return; |
|---|
| 89 | 80 | |
|---|
| 81 | + kasan_unpoison_vmalloc(s, SCS_SIZE); |
|---|
| 90 | 82 | vfree_atomic(s); |
|---|
| 91 | | -} |
|---|
| 92 | | - |
|---|
| 93 | | -static struct page *__scs_page(struct task_struct *tsk) |
|---|
| 94 | | -{ |
|---|
| 95 | | - return vmalloc_to_page(__scs_base(tsk)); |
|---|
| 96 | 83 | } |
|---|
| 97 | 84 | |
|---|
| 98 | 85 | static int scs_cleanup(unsigned int cpu) |
|---|
| .. | .. |
|---|
| 110 | 97 | |
|---|
| 111 | 98 | void __init scs_init(void) |
|---|
| 112 | 99 | { |
|---|
| 113 | | - WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL, |
|---|
| 114 | | - scs_cleanup) < 0); |
|---|
| 115 | | -} |
|---|
| 116 | | - |
|---|
| 117 | | -#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */ |
|---|
| 118 | | - |
|---|
| 119 | | -static struct kmem_cache *scs_cache; |
|---|
| 120 | | - |
|---|
| 121 | | -static inline void *scs_alloc(int node) |
|---|
| 122 | | -{ |
|---|
| 123 | | - void *s; |
|---|
| 124 | | - |
|---|
| 125 | | - s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); |
|---|
| 126 | | - if (s) { |
|---|
| 127 | | - scs_set_magic(s); |
|---|
| 128 | | - /* |
|---|
| 129 | | - * Poison the allocation to catch unintentional accesses to |
|---|
| 130 | | - * the shadow stack when KASAN is enabled. |
|---|
| 131 | | - */ |
|---|
| 132 | | - kasan_poison_object_data(scs_cache, s); |
|---|
| 133 | | - } |
|---|
| 134 | | - |
|---|
| 135 | | - return s; |
|---|
| 136 | | -} |
|---|
| 137 | | - |
|---|
| 138 | | -static inline void scs_free(void *s) |
|---|
| 139 | | -{ |
|---|
| 140 | | - kasan_unpoison_object_data(scs_cache, s); |
|---|
| 141 | | - kmem_cache_free(scs_cache, s); |
|---|
| 142 | | -} |
|---|
| 143 | | - |
|---|
| 144 | | -static struct page *__scs_page(struct task_struct *tsk) |
|---|
| 145 | | -{ |
|---|
| 146 | | - return virt_to_page(__scs_base(tsk)); |
|---|
| 147 | | -} |
|---|
| 148 | | - |
|---|
| 149 | | -void __init scs_init(void) |
|---|
| 150 | | -{ |
|---|
| 151 | | - scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE, |
|---|
| 152 | | - 0, NULL); |
|---|
| 153 | | - WARN_ON(!scs_cache); |
|---|
| 154 | | -} |
|---|
| 155 | | - |
|---|
| 156 | | -#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ |
|---|
| 157 | | - |
|---|
| 158 | | -void scs_task_reset(struct task_struct *tsk) |
|---|
| 159 | | -{ |
|---|
| 160 | | - /* |
|---|
| 161 | | - * Reset the shadow stack to the base address in case the task |
|---|
| 162 | | - * is reused. |
|---|
| 163 | | - */ |
|---|
| 164 | | - task_set_scs(tsk, __scs_base(tsk)); |
|---|
| 165 | | -} |
|---|
| 166 | | - |
|---|
| 167 | | -static void scs_account(struct task_struct *tsk, int account) |
|---|
| 168 | | -{ |
|---|
| 169 | | - mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES, |
|---|
| 170 | | - account * SCS_SIZE); |
|---|
| 100 | + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL, |
|---|
| 101 | + scs_cleanup); |
|---|
| 171 | 102 | } |
|---|
| 172 | 103 | |
|---|
| 173 | 104 | int scs_prepare(struct task_struct *tsk, int node) |
|---|
| 174 | 105 | { |
|---|
| 175 | | - void *s; |
|---|
| 106 | + void *s = scs_alloc(node); |
|---|
| 176 | 107 | |
|---|
| 177 | | - s = scs_alloc(node); |
|---|
| 178 | 108 | if (!s) |
|---|
| 179 | 109 | return -ENOMEM; |
|---|
| 180 | 110 | |
|---|
| 181 | | - task_set_scs(tsk, s); |
|---|
| 182 | | - scs_account(tsk, 1); |
|---|
| 183 | | - |
|---|
| 111 | + task_scs(tsk) = task_scs_sp(tsk) = s; |
|---|
| 184 | 112 | return 0; |
|---|
| 185 | | -} |
|---|
| 186 | | - |
|---|
| 187 | | -#ifdef CONFIG_DEBUG_STACK_USAGE |
|---|
| 188 | | -static inline unsigned long scs_used(struct task_struct *tsk) |
|---|
| 189 | | -{ |
|---|
| 190 | | - unsigned long *p = __scs_base(tsk); |
|---|
| 191 | | - unsigned long *end = scs_magic(p); |
|---|
| 192 | | - unsigned long s = (unsigned long)p; |
|---|
| 193 | | - |
|---|
| 194 | | - while (p < end && READ_ONCE_NOCHECK(*p)) |
|---|
| 195 | | - p++; |
|---|
| 196 | | - |
|---|
| 197 | | - return (unsigned long)p - s; |
|---|
| 198 | 113 | } |
|---|
| 199 | 114 | |
|---|
| 200 | 115 | static void scs_check_usage(struct task_struct *tsk) |
|---|
| 201 | 116 | { |
|---|
| 202 | | - static DEFINE_SPINLOCK(lock); |
|---|
| 203 | 117 | static unsigned long highest; |
|---|
| 204 | | - unsigned long used = scs_used(tsk); |
|---|
| 205 | 118 | |
|---|
| 206 | | - if (used <= highest) |
|---|
| 119 | + unsigned long *p, prev, curr = highest, used = 0; |
|---|
| 120 | + |
|---|
| 121 | + if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE)) |
|---|
| 207 | 122 | return; |
|---|
| 208 | 123 | |
|---|
| 209 | | - spin_lock(&lock); |
|---|
| 210 | | - |
|---|
| 211 | | - if (used > highest) { |
|---|
| 212 | | - pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", |
|---|
| 213 | | - tsk->comm, task_pid_nr(tsk), used); |
|---|
| 214 | | - highest = used; |
|---|
| 124 | + for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) { |
|---|
| 125 | + if (!READ_ONCE_NOCHECK(*p)) |
|---|
| 126 | + break; |
|---|
| 127 | + used += sizeof(*p); |
|---|
| 215 | 128 | } |
|---|
| 216 | 129 | |
|---|
| 217 | | - spin_unlock(&lock); |
|---|
| 218 | | -} |
|---|
| 219 | | -#else |
|---|
| 220 | | -static inline void scs_check_usage(struct task_struct *tsk) |
|---|
| 221 | | -{ |
|---|
| 222 | | -} |
|---|
| 223 | | -#endif |
|---|
| 130 | + while (used > curr) { |
|---|
| 131 | + prev = cmpxchg_relaxed(&highest, curr, used); |
|---|
| 224 | 132 | |
|---|
| 225 | | -bool scs_corrupted(struct task_struct *tsk) |
|---|
| 226 | | -{ |
|---|
| 227 | | - unsigned long *magic = scs_magic(__scs_base(tsk)); |
|---|
| 133 | + if (prev == curr) { |
|---|
| 134 | + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", |
|---|
| 135 | + tsk->comm, task_pid_nr(tsk), used); |
|---|
| 136 | + break; |
|---|
| 137 | + } |
|---|
| 228 | 138 | |
|---|
| 229 | | - return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; |
|---|
| 139 | + curr = prev; |
|---|
| 140 | + } |
|---|
| 230 | 141 | } |
|---|
| 231 | 142 | |
|---|
| 232 | 143 | void scs_release(struct task_struct *tsk) |
|---|
| 233 | 144 | { |
|---|
| 234 | | - void *s; |
|---|
| 145 | + void *s = task_scs(tsk); |
|---|
| 235 | 146 | |
|---|
| 236 | | - s = __scs_base(tsk); |
|---|
| 237 | 147 | if (!s) |
|---|
| 238 | 148 | return; |
|---|
| 239 | 149 | |
|---|
| 240 | | - WARN_ON(scs_corrupted(tsk)); |
|---|
| 150 | + WARN(task_scs_end_corrupted(tsk), |
|---|
| 151 | + "corrupted shadow stack detected when freeing task\n"); |
|---|
| 241 | 152 | scs_check_usage(tsk); |
|---|
| 242 | | - |
|---|
| 243 | | - scs_account(tsk, -1); |
|---|
| 244 | | - task_set_scs(tsk, NULL); |
|---|
| 245 | 153 | scs_free(s); |
|---|
| 246 | 154 | } |
|---|