| .. | .. |
|---|
| 8 | 8 | * |
|---|
| 9 | 9 | * Lock order: |
|---|
| 10 | 10 | * contex.ldt_usr_sem |
|---|
| 11 | | - * mmap_sem |
|---|
| 11 | + * mmap_lock |
|---|
| 12 | 12 | * context.lock |
|---|
| 13 | 13 | */ |
|---|
| 14 | 14 | |
|---|
| .. | .. |
|---|
| 27 | 27 | #include <asm/tlb.h> |
|---|
| 28 | 28 | #include <asm/desc.h> |
|---|
| 29 | 29 | #include <asm/mmu_context.h> |
|---|
| 30 | | -#include <asm/syscalls.h> |
|---|
| 30 | +#include <asm/pgtable_areas.h> |
|---|
| 31 | + |
|---|
| 32 | +#include <xen/xen.h> |
|---|
| 33 | + |
|---|
| 34 | +/* This is a multiple of PAGE_SIZE. */ |
|---|
| 35 | +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) |
|---|
| 36 | + |
|---|
| 37 | +static inline void *ldt_slot_va(int slot) |
|---|
| 38 | +{ |
|---|
| 39 | + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); |
|---|
| 40 | +} |
|---|
| 41 | + |
|---|
| 42 | +void load_mm_ldt(struct mm_struct *mm) |
|---|
| 43 | +{ |
|---|
| 44 | + struct ldt_struct *ldt; |
|---|
| 45 | + |
|---|
| 46 | + /* READ_ONCE synchronizes with smp_store_release */ |
|---|
| 47 | + ldt = READ_ONCE(mm->context.ldt); |
|---|
| 48 | + |
|---|
| 49 | + /* |
|---|
| 50 | + * Any change to mm->context.ldt is followed by an IPI to all |
|---|
| 51 | + * CPUs with the mm active. The LDT will not be freed until |
|---|
| 52 | + * after the IPI is handled by all such CPUs. This means that, |
|---|
| 53 | + * if the ldt_struct changes before we return, the values we see |
|---|
| 54 | + * will be safe, and the new values will be loaded before we run |
|---|
| 55 | + * any user code. |
|---|
| 56 | + * |
|---|
| 57 | + * NB: don't try to convert this to use RCU without extreme care. |
|---|
| 58 | + * We would still need IRQs off, because we don't want to change |
|---|
| 59 | + * the local LDT after an IPI loaded a newer value than the one |
|---|
| 60 | + * that we can see. |
|---|
| 61 | + */ |
|---|
| 62 | + |
|---|
| 63 | + if (unlikely(ldt)) { |
|---|
| 64 | + if (static_cpu_has(X86_FEATURE_PTI)) { |
|---|
| 65 | + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { |
|---|
| 66 | + /* |
|---|
| 67 | + * Whoops -- either the new LDT isn't mapped |
|---|
| 68 | + * (if slot == -1) or is mapped into a bogus |
|---|
| 69 | + * slot (if slot > 1). |
|---|
| 70 | + */ |
|---|
| 71 | + clear_LDT(); |
|---|
| 72 | + return; |
|---|
| 73 | + } |
|---|
| 74 | + |
|---|
| 75 | + /* |
|---|
| 76 | + * If page table isolation is enabled, ldt->entries |
|---|
| 77 | + * will not be mapped in the userspace pagetables. |
|---|
| 78 | + * Tell the CPU to access the LDT through the alias |
|---|
| 79 | + * at ldt_slot_va(ldt->slot). |
|---|
| 80 | + */ |
|---|
| 81 | + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); |
|---|
| 82 | + } else { |
|---|
| 83 | + set_ldt(ldt->entries, ldt->nr_entries); |
|---|
| 84 | + } |
|---|
| 85 | + } else { |
|---|
| 86 | + clear_LDT(); |
|---|
| 87 | + } |
|---|
| 88 | +} |
|---|
| 89 | + |
|---|
| 90 | +void switch_ldt(struct mm_struct *prev, struct mm_struct *next) |
|---|
| 91 | +{ |
|---|
| 92 | + /* |
|---|
| 93 | + * Load the LDT if either the old or new mm had an LDT. |
|---|
| 94 | + * |
|---|
| 95 | + * An mm will never go from having an LDT to not having an LDT. Two |
|---|
| 96 | + * mms never share an LDT, so we don't gain anything by checking to |
|---|
| 97 | + * see whether the LDT changed. There's also no guarantee that |
|---|
| 98 | + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, |
|---|
| 99 | + * then prev->context.ldt will also be non-NULL. |
|---|
| 100 | + * |
|---|
| 101 | + * If we really cared, we could optimize the case where prev == next |
|---|
| 102 | + * and we're exiting lazy mode. Most of the time, if this happens, |
|---|
| 103 | + * we don't actually need to reload LDTR, but modify_ldt() is mostly |
|---|
| 104 | + * used by legacy code and emulators where we don't need this level of |
|---|
| 105 | + * performance. |
|---|
| 106 | + * |
|---|
| 107 | + * This uses | instead of || because it generates better code. |
|---|
| 108 | + */ |
|---|
| 109 | + if (unlikely((unsigned long)prev->context.ldt | |
|---|
| 110 | + (unsigned long)next->context.ldt)) |
|---|
| 111 | + load_mm_ldt(next); |
|---|
| 112 | + |
|---|
| 113 | + DEBUG_LOCKS_WARN_ON(preemptible()); |
|---|
| 114 | +} |
|---|
| 31 | 115 | |
|---|
| 32 | 116 | static void refresh_ldt_segments(void) |
|---|
| 33 | 117 | { |
|---|
| .. | .. |
|---|
| 113 | 197 | * tables. |
|---|
| 114 | 198 | */ |
|---|
| 115 | 199 | WARN_ON(!had_kernel_mapping); |
|---|
| 116 | | - if (static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 200 | + if (boot_cpu_has(X86_FEATURE_PTI)) |
|---|
| 117 | 201 | WARN_ON(!had_user_mapping); |
|---|
| 118 | 202 | } else { |
|---|
| 119 | 203 | /* |
|---|
| .. | .. |
|---|
| 121 | 205 | * Sync the pgd to the usermode tables. |
|---|
| 122 | 206 | */ |
|---|
| 123 | 207 | WARN_ON(had_kernel_mapping); |
|---|
| 124 | | - if (static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 208 | + if (boot_cpu_has(X86_FEATURE_PTI)) |
|---|
| 125 | 209 | WARN_ON(had_user_mapping); |
|---|
| 126 | 210 | } |
|---|
| 127 | 211 | } |
|---|
| .. | .. |
|---|
| 156 | 240 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); |
|---|
| 157 | 241 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); |
|---|
| 158 | 242 | |
|---|
| 159 | | - if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
|---|
| 243 | + if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
|---|
| 160 | 244 | set_pmd(u_pmd, *k_pmd); |
|---|
| 161 | 245 | } |
|---|
| 162 | 246 | |
|---|
| .. | .. |
|---|
| 181 | 265 | { |
|---|
| 182 | 266 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); |
|---|
| 183 | 267 | |
|---|
| 184 | | - if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
|---|
| 268 | + if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
|---|
| 185 | 269 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); |
|---|
| 186 | 270 | } |
|---|
| 187 | 271 | |
|---|
| .. | .. |
|---|
| 208 | 292 | spinlock_t *ptl; |
|---|
| 209 | 293 | int i, nr_pages; |
|---|
| 210 | 294 | |
|---|
| 211 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 295 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
|---|
| 212 | 296 | return 0; |
|---|
| 213 | 297 | |
|---|
| 214 | 298 | /* |
|---|
| .. | .. |
|---|
| 271 | 355 | return; |
|---|
| 272 | 356 | |
|---|
| 273 | 357 | /* LDT map/unmap is only required for PTI */ |
|---|
| 274 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 358 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
|---|
| 275 | 359 | return; |
|---|
| 276 | 360 | |
|---|
| 277 | 361 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); |
|---|
| .. | .. |
|---|
| 288 | 372 | } |
|---|
| 289 | 373 | |
|---|
| 290 | 374 | va = (unsigned long)ldt_slot_va(ldt->slot); |
|---|
| 291 | | - flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, 0); |
|---|
| 375 | + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); |
|---|
| 292 | 376 | } |
|---|
| 293 | 377 | |
|---|
| 294 | 378 | #else /* !CONFIG_PAGE_TABLE_ISOLATION */ |
|---|
| .. | .. |
|---|
| 311 | 395 | unsigned long start = LDT_BASE_ADDR; |
|---|
| 312 | 396 | unsigned long end = LDT_END_ADDR; |
|---|
| 313 | 397 | |
|---|
| 314 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 398 | + if (!boot_cpu_has(X86_FEATURE_PTI)) |
|---|
| 315 | 399 | return; |
|---|
| 316 | 400 | |
|---|
| 317 | 401 | tlb_gather_mmu(&tlb, mm, start, end); |
|---|
| .. | .. |
|---|
| 461 | 545 | return bytecount; |
|---|
| 462 | 546 | } |
|---|
| 463 | 547 | |
|---|
| 548 | +static bool allow_16bit_segments(void) |
|---|
| 549 | +{ |
|---|
| 550 | + if (!IS_ENABLED(CONFIG_X86_16BIT)) |
|---|
| 551 | + return false; |
|---|
| 552 | + |
|---|
| 553 | +#ifdef CONFIG_XEN_PV |
|---|
| 554 | + /* |
|---|
| 555 | + * Xen PV does not implement ESPFIX64, which means that 16-bit |
|---|
| 556 | + * segments will not work correctly. Until either Xen PV implements |
|---|
| 557 | + * ESPFIX64 and can signal this fact to the guest or unless someone |
|---|
| 558 | + * provides compelling evidence that allowing broken 16-bit segments |
|---|
| 559 | + * is worthwhile, disallow 16-bit segments under Xen PV. |
|---|
| 560 | + */ |
|---|
| 561 | + if (xen_pv_domain()) { |
|---|
| 562 | + pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); |
|---|
| 563 | + return false; |
|---|
| 564 | + } |
|---|
| 565 | +#endif |
|---|
| 566 | + |
|---|
| 567 | + return true; |
|---|
| 568 | +} |
|---|
| 569 | + |
|---|
| 464 | 570 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
|---|
| 465 | 571 | { |
|---|
| 466 | 572 | struct mm_struct *mm = current->mm; |
|---|
| .. | .. |
|---|
| 492 | 598 | /* The user wants to clear the entry. */ |
|---|
| 493 | 599 | memset(&ldt, 0, sizeof(ldt)); |
|---|
| 494 | 600 | } else { |
|---|
| 495 | | - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { |
|---|
| 601 | + if (!ldt_info.seg_32bit && !allow_16bit_segments()) { |
|---|
| 496 | 602 | error = -EINVAL; |
|---|
| 497 | 603 | goto out; |
|---|
| 498 | 604 | } |
|---|