.. | .. |
---|
8 | 8 | * |
---|
9 | 9 | * Lock order: |
---|
10 | 10 | * contex.ldt_usr_sem |
---|
11 | | - * mmap_sem |
---|
| 11 | + * mmap_lock |
---|
12 | 12 | * context.lock |
---|
13 | 13 | */ |
---|
14 | 14 | |
---|
.. | .. |
---|
27 | 27 | #include <asm/tlb.h> |
---|
28 | 28 | #include <asm/desc.h> |
---|
29 | 29 | #include <asm/mmu_context.h> |
---|
30 | | -#include <asm/syscalls.h> |
---|
| 30 | +#include <asm/pgtable_areas.h> |
---|
| 31 | + |
---|
| 32 | +#include <xen/xen.h> |
---|
| 33 | + |
---|
| 34 | +/* This is a multiple of PAGE_SIZE. */ |
---|
| 35 | +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) |
---|
| 36 | + |
---|
| 37 | +static inline void *ldt_slot_va(int slot) |
---|
| 38 | +{ |
---|
| 39 | + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); |
---|
| 40 | +} |
---|
| 41 | + |
---|
| 42 | +void load_mm_ldt(struct mm_struct *mm) |
---|
| 43 | +{ |
---|
| 44 | + struct ldt_struct *ldt; |
---|
| 45 | + |
---|
| 46 | + /* READ_ONCE synchronizes with smp_store_release */ |
---|
| 47 | + ldt = READ_ONCE(mm->context.ldt); |
---|
| 48 | + |
---|
| 49 | + /* |
---|
| 50 | + * Any change to mm->context.ldt is followed by an IPI to all |
---|
| 51 | + * CPUs with the mm active. The LDT will not be freed until |
---|
| 52 | + * after the IPI is handled by all such CPUs. This means that, |
---|
| 53 | + * if the ldt_struct changes before we return, the values we see |
---|
| 54 | + * will be safe, and the new values will be loaded before we run |
---|
| 55 | + * any user code. |
---|
| 56 | + * |
---|
| 57 | + * NB: don't try to convert this to use RCU without extreme care. |
---|
| 58 | + * We would still need IRQs off, because we don't want to change |
---|
| 59 | + * the local LDT after an IPI loaded a newer value than the one |
---|
| 60 | + * that we can see. |
---|
| 61 | + */ |
---|
| 62 | + |
---|
| 63 | + if (unlikely(ldt)) { |
---|
| 64 | + if (static_cpu_has(X86_FEATURE_PTI)) { |
---|
| 65 | + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { |
---|
| 66 | + /* |
---|
| 67 | + * Whoops -- either the new LDT isn't mapped |
---|
| 68 | + * (if slot == -1) or is mapped into a bogus |
---|
| 69 | + * slot (if slot > 1). |
---|
| 70 | + */ |
---|
| 71 | + clear_LDT(); |
---|
| 72 | + return; |
---|
| 73 | + } |
---|
| 74 | + |
---|
| 75 | + /* |
---|
| 76 | + * If page table isolation is enabled, ldt->entries |
---|
| 77 | + * will not be mapped in the userspace pagetables. |
---|
| 78 | + * Tell the CPU to access the LDT through the alias |
---|
| 79 | + * at ldt_slot_va(ldt->slot). |
---|
| 80 | + */ |
---|
| 81 | + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); |
---|
| 82 | + } else { |
---|
| 83 | + set_ldt(ldt->entries, ldt->nr_entries); |
---|
| 84 | + } |
---|
| 85 | + } else { |
---|
| 86 | + clear_LDT(); |
---|
| 87 | + } |
---|
| 88 | +} |
---|
| 89 | + |
---|
| 90 | +void switch_ldt(struct mm_struct *prev, struct mm_struct *next) |
---|
| 91 | +{ |
---|
| 92 | + /* |
---|
| 93 | + * Load the LDT if either the old or new mm had an LDT. |
---|
| 94 | + * |
---|
| 95 | + * An mm will never go from having an LDT to not having an LDT. Two |
---|
| 96 | + * mms never share an LDT, so we don't gain anything by checking to |
---|
| 97 | + * see whether the LDT changed. There's also no guarantee that |
---|
| 98 | + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, |
---|
| 99 | + * then prev->context.ldt will also be non-NULL. |
---|
| 100 | + * |
---|
| 101 | + * If we really cared, we could optimize the case where prev == next |
---|
| 102 | + * and we're exiting lazy mode. Most of the time, if this happens, |
---|
| 103 | + * we don't actually need to reload LDTR, but modify_ldt() is mostly |
---|
| 104 | + * used by legacy code and emulators where we don't need this level of |
---|
| 105 | + * performance. |
---|
| 106 | + * |
---|
| 107 | + * This uses | instead of || because it generates better code. |
---|
| 108 | + */ |
---|
| 109 | + if (unlikely((unsigned long)prev->context.ldt | |
---|
| 110 | + (unsigned long)next->context.ldt)) |
---|
| 111 | + load_mm_ldt(next); |
---|
| 112 | + |
---|
| 113 | + DEBUG_LOCKS_WARN_ON(preemptible()); |
---|
| 114 | +} |
---|
31 | 115 | |
---|
32 | 116 | static void refresh_ldt_segments(void) |
---|
33 | 117 | { |
---|
.. | .. |
---|
113 | 197 | * tables. |
---|
114 | 198 | */ |
---|
115 | 199 | WARN_ON(!had_kernel_mapping); |
---|
116 | | - if (static_cpu_has(X86_FEATURE_PTI)) |
---|
| 200 | + if (boot_cpu_has(X86_FEATURE_PTI)) |
---|
117 | 201 | WARN_ON(!had_user_mapping); |
---|
118 | 202 | } else { |
---|
119 | 203 | /* |
---|
.. | .. |
---|
121 | 205 | * Sync the pgd to the usermode tables. |
---|
122 | 206 | */ |
---|
123 | 207 | WARN_ON(had_kernel_mapping); |
---|
124 | | - if (static_cpu_has(X86_FEATURE_PTI)) |
---|
| 208 | + if (boot_cpu_has(X86_FEATURE_PTI)) |
---|
125 | 209 | WARN_ON(had_user_mapping); |
---|
126 | 210 | } |
---|
127 | 211 | } |
---|
.. | .. |
---|
156 | 240 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); |
---|
157 | 241 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); |
---|
158 | 242 | |
---|
159 | | - if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
---|
| 243 | + if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
---|
160 | 244 | set_pmd(u_pmd, *k_pmd); |
---|
161 | 245 | } |
---|
162 | 246 | |
---|
.. | .. |
---|
181 | 265 | { |
---|
182 | 266 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); |
---|
183 | 267 | |
---|
184 | | - if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
---|
| 268 | + if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
---|
185 | 269 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); |
---|
186 | 270 | } |
---|
187 | 271 | |
---|
.. | .. |
---|
208 | 292 | spinlock_t *ptl; |
---|
209 | 293 | int i, nr_pages; |
---|
210 | 294 | |
---|
211 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
---|
| 295 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
---|
212 | 296 | return 0; |
---|
213 | 297 | |
---|
214 | 298 | /* |
---|
.. | .. |
---|
271 | 355 | return; |
---|
272 | 356 | |
---|
273 | 357 | /* LDT map/unmap is only required for PTI */ |
---|
274 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
---|
| 358 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
---|
275 | 359 | return; |
---|
276 | 360 | |
---|
277 | 361 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); |
---|
.. | .. |
---|
288 | 372 | } |
---|
289 | 373 | |
---|
290 | 374 | va = (unsigned long)ldt_slot_va(ldt->slot); |
---|
291 | | - flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, 0); |
---|
| 375 | + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); |
---|
292 | 376 | } |
---|
293 | 377 | |
---|
294 | 378 | #else /* !CONFIG_PAGE_TABLE_ISOLATION */ |
---|
.. | .. |
---|
311 | 395 | unsigned long start = LDT_BASE_ADDR; |
---|
312 | 396 | unsigned long end = LDT_END_ADDR; |
---|
313 | 397 | |
---|
314 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
---|
| 398 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
---|
315 | 399 | return; |
---|
316 | 400 | |
---|
317 | 401 | tlb_gather_mmu(&tlb, mm, start, end); |
---|
.. | .. |
---|
461 | 545 | return bytecount; |
---|
462 | 546 | } |
---|
463 | 547 | |
---|
| 548 | +static bool allow_16bit_segments(void) |
---|
| 549 | +{ |
---|
| 550 | + if (!IS_ENABLED(CONFIG_X86_16BIT)) |
---|
| 551 | + return false; |
---|
| 552 | + |
---|
| 553 | +#ifdef CONFIG_XEN_PV |
---|
| 554 | + /* |
---|
| 555 | + * Xen PV does not implement ESPFIX64, which means that 16-bit |
---|
| 556 | + * segments will not work correctly. Until either Xen PV implements |
---|
| 557 | + * ESPFIX64 and can signal this fact to the guest or unless someone |
---|
| 558 | + * provides compelling evidence that allowing broken 16-bit segments |
---|
| 559 | + * is worthwhile, disallow 16-bit segments under Xen PV. |
---|
| 560 | + */ |
---|
| 561 | + if (xen_pv_domain()) { |
---|
| 562 | + pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); |
---|
| 563 | + return false; |
---|
| 564 | + } |
---|
| 565 | +#endif |
---|
| 566 | + |
---|
| 567 | + return true; |
---|
| 568 | +} |
---|
| 569 | + |
---|
464 | 570 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
---|
465 | 571 | { |
---|
466 | 572 | struct mm_struct *mm = current->mm; |
---|
.. | .. |
---|
492 | 598 | /* The user wants to clear the entry. */ |
---|
493 | 599 | memset(&ldt, 0, sizeof(ldt)); |
---|
494 | 600 | } else { |
---|
495 | | - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { |
---|
| 601 | + if (!ldt_info.seg_32bit && !allow_16bit_segments()) { |
---|
496 | 602 | error = -EINVAL; |
---|
497 | 603 | goto out; |
---|
498 | 604 | } |
---|