hc
2024-01-31 f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2
kernel/arch/x86/kernel/ldt.c
....@@ -8,7 +8,7 @@
88 *
99 * Lock order:
1010 * contex.ldt_usr_sem
11
- * mmap_sem
11
+ * mmap_lock
1212 * context.lock
1313 */
1414
....@@ -27,7 +27,91 @@
2727 #include <asm/tlb.h>
2828 #include <asm/desc.h>
2929 #include <asm/mmu_context.h>
30
-#include <asm/syscalls.h>
30
+#include <asm/pgtable_areas.h>
31
+
32
+#include <xen/xen.h>
33
+
34
+/* This is a multiple of PAGE_SIZE. */
35
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
36
+
37
+static inline void *ldt_slot_va(int slot)
38
+{
39
+ return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
40
+}
41
+
42
+void load_mm_ldt(struct mm_struct *mm)
43
+{
44
+ struct ldt_struct *ldt;
45
+
46
+ /* READ_ONCE synchronizes with smp_store_release */
47
+ ldt = READ_ONCE(mm->context.ldt);
48
+
49
+ /*
50
+ * Any change to mm->context.ldt is followed by an IPI to all
51
+ * CPUs with the mm active. The LDT will not be freed until
52
+ * after the IPI is handled by all such CPUs. This means that,
53
+ * if the ldt_struct changes before we return, the values we see
54
+ * will be safe, and the new values will be loaded before we run
55
+ * any user code.
56
+ *
57
+ * NB: don't try to convert this to use RCU without extreme care.
58
+ * We would still need IRQs off, because we don't want to change
59
+ * the local LDT after an IPI loaded a newer value than the one
60
+ * that we can see.
61
+ */
62
+
63
+ if (unlikely(ldt)) {
64
+ if (static_cpu_has(X86_FEATURE_PTI)) {
65
+ if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
66
+ /*
67
+ * Whoops -- either the new LDT isn't mapped
68
+ * (if slot == -1) or is mapped into a bogus
69
+ * slot (if slot > 1).
70
+ */
71
+ clear_LDT();
72
+ return;
73
+ }
74
+
75
+ /*
76
+ * If page table isolation is enabled, ldt->entries
77
+ * will not be mapped in the userspace pagetables.
78
+ * Tell the CPU to access the LDT through the alias
79
+ * at ldt_slot_va(ldt->slot).
80
+ */
81
+ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
82
+ } else {
83
+ set_ldt(ldt->entries, ldt->nr_entries);
84
+ }
85
+ } else {
86
+ clear_LDT();
87
+ }
88
+}
89
+
90
+void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
91
+{
92
+ /*
93
+ * Load the LDT if either the old or new mm had an LDT.
94
+ *
95
+ * An mm will never go from having an LDT to not having an LDT. Two
96
+ * mms never share an LDT, so we don't gain anything by checking to
97
+ * see whether the LDT changed. There's also no guarantee that
98
+ * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
99
+ * then prev->context.ldt will also be non-NULL.
100
+ *
101
+ * If we really cared, we could optimize the case where prev == next
102
+ * and we're exiting lazy mode. Most of the time, if this happens,
103
+ * we don't actually need to reload LDTR, but modify_ldt() is mostly
104
+ * used by legacy code and emulators where we don't need this level of
105
+ * performance.
106
+ *
107
+ * This uses | instead of || because it generates better code.
108
+ */
109
+ if (unlikely((unsigned long)prev->context.ldt |
110
+ (unsigned long)next->context.ldt))
111
+ load_mm_ldt(next);
112
+
113
+ DEBUG_LOCKS_WARN_ON(preemptible());
114
+}
31115
32116 static void refresh_ldt_segments(void)
33117 {
....@@ -113,7 +197,7 @@
113197 * tables.
114198 */
115199 WARN_ON(!had_kernel_mapping);
116
- if (static_cpu_has(X86_FEATURE_PTI))
200
+ if (boot_cpu_has(X86_FEATURE_PTI))
117201 WARN_ON(!had_user_mapping);
118202 } else {
119203 /*
....@@ -121,7 +205,7 @@
121205 * Sync the pgd to the usermode tables.
122206 */
123207 WARN_ON(had_kernel_mapping);
124
- if (static_cpu_has(X86_FEATURE_PTI))
208
+ if (boot_cpu_has(X86_FEATURE_PTI))
125209 WARN_ON(had_user_mapping);
126210 }
127211 }
....@@ -156,7 +240,7 @@
156240 k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
157241 u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
158242
159
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
243
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
160244 set_pmd(u_pmd, *k_pmd);
161245 }
162246
....@@ -181,7 +265,7 @@
181265 {
182266 pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
183267
184
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
268
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
185269 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
186270 }
187271
....@@ -208,7 +292,7 @@
208292 spinlock_t *ptl;
209293 int i, nr_pages;
210294
211
- if (!static_cpu_has(X86_FEATURE_PTI))
295
+ if (!boot_cpu_has(X86_FEATURE_PTI))
212296 return 0;
213297
214298 /*
....@@ -271,7 +355,7 @@
271355 return;
272356
273357 /* LDT map/unmap is only required for PTI */
274
- if (!static_cpu_has(X86_FEATURE_PTI))
358
+ if (!boot_cpu_has(X86_FEATURE_PTI))
275359 return;
276360
277361 nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
....@@ -288,7 +372,7 @@
288372 }
289373
290374 va = (unsigned long)ldt_slot_va(ldt->slot);
291
- flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, 0);
375
+ flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
292376 }
293377
294378 #else /* !CONFIG_PAGE_TABLE_ISOLATION */
....@@ -311,7 +395,7 @@
311395 unsigned long start = LDT_BASE_ADDR;
312396 unsigned long end = LDT_END_ADDR;
313397
314
- if (!static_cpu_has(X86_FEATURE_PTI))
398
+ if (!boot_cpu_has(X86_FEATURE_PTI))
315399 return;
316400
317401 tlb_gather_mmu(&tlb, mm, start, end);
....@@ -461,6 +545,28 @@
461545 return bytecount;
462546 }
463547
548
+static bool allow_16bit_segments(void)
549
+{
550
+ if (!IS_ENABLED(CONFIG_X86_16BIT))
551
+ return false;
552
+
553
+#ifdef CONFIG_XEN_PV
554
+ /*
555
+ * Xen PV does not implement ESPFIX64, which means that 16-bit
556
+ * segments will not work correctly. Until either Xen PV implements
557
+ * ESPFIX64 and can signal this fact to the guest or unless someone
558
+ * provides compelling evidence that allowing broken 16-bit segments
559
+ * is worthwhile, disallow 16-bit segments under Xen PV.
560
+ */
561
+ if (xen_pv_domain()) {
562
+ pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
563
+ return false;
564
+ }
565
+#endif
566
+
567
+ return true;
568
+}
569
+
464570 static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
465571 {
466572 struct mm_struct *mm = current->mm;
....@@ -492,7 +598,7 @@
492598 /* The user wants to clear the entry. */
493599 memset(&ldt, 0, sizeof(ldt));
494600 } else {
495
- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
601
+ if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
496602 error = -EINVAL;
497603 goto out;
498604 }