| .. | .. |
|---|
| 13 | 13 | #include <asm/pti.h> |
|---|
| 14 | 14 | #include <asm/processor-flags.h> |
|---|
| 15 | 15 | |
|---|
| 16 | | -/* |
|---|
| 17 | | - * The x86 feature is called PCID (Process Context IDentifier). It is similar |
|---|
| 18 | | - * to what is traditionally called ASID on the RISC processors. |
|---|
| 19 | | - * |
|---|
| 20 | | - * We don't use the traditional ASID implementation, where each process/mm gets |
|---|
| 21 | | - * its own ASID and flush/restart when we run out of ASID space. |
|---|
| 22 | | - * |
|---|
| 23 | | - * Instead we have a small per-cpu array of ASIDs and cache the last few mm's |
|---|
| 24 | | - * that came by on this CPU, allowing cheaper switch_mm between processes on |
|---|
| 25 | | - * this CPU. |
|---|
| 26 | | - * |
|---|
| 27 | | - * We end up with different spaces for different things. To avoid confusion we |
|---|
| 28 | | - * use different names for each of them: |
|---|
| 29 | | - * |
|---|
| 30 | | - * ASID - [0, TLB_NR_DYN_ASIDS-1] |
|---|
| 31 | | - * the canonical identifier for an mm |
|---|
| 32 | | - * |
|---|
| 33 | | - * kPCID - [1, TLB_NR_DYN_ASIDS] |
|---|
| 34 | | - * the value we write into the PCID part of CR3; corresponds to the |
|---|
| 35 | | - * ASID+1, because PCID 0 is special. |
|---|
| 36 | | - * |
|---|
| 37 | | - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] |
|---|
| 38 | | - * for KPTI each mm has two address spaces and thus needs two |
|---|
| 39 | | - * PCID values, but we can still do with a single ASID denomination |
|---|
| 40 | | - * for each mm. Corresponds to kPCID + 2048. |
|---|
| 41 | | - * |
|---|
| 42 | | - */ |
|---|
| 16 | +void __flush_tlb_all(void); |
|---|
| 43 | 17 | |
|---|
| 44 | | -/* There are 12 bits of space for ASIDS in CR3 */ |
|---|
| 45 | | -#define CR3_HW_ASID_BITS 12 |
|---|
| 18 | +#define TLB_FLUSH_ALL -1UL |
|---|
| 46 | 19 | |
|---|
| 47 | | -/* |
|---|
| 48 | | - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for |
|---|
| 49 | | - * user/kernel switches |
|---|
| 50 | | - */ |
|---|
| 51 | | -#ifdef CONFIG_PAGE_TABLE_ISOLATION |
|---|
| 52 | | -# define PTI_CONSUMED_PCID_BITS 1 |
|---|
| 53 | | -#else |
|---|
| 54 | | -# define PTI_CONSUMED_PCID_BITS 0 |
|---|
| 55 | | -#endif |
|---|
| 20 | +void cr4_update_irqsoff(unsigned long set, unsigned long clear); |
|---|
| 21 | +unsigned long cr4_read_shadow(void); |
|---|
| 56 | 22 | |
|---|
| 57 | | -#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) |
|---|
| 23 | +/* Set in this cpu's CR4. */ |
|---|
| 24 | +static inline void cr4_set_bits_irqsoff(unsigned long mask) |
|---|
| 25 | +{ |
|---|
| 26 | + cr4_update_irqsoff(mask, 0); |
|---|
| 27 | +} |
|---|
| 58 | 28 | |
|---|
| 59 | | -/* |
|---|
| 60 | | - * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account |
|---|
| 61 | | - * for them being zero-based. Another -1 is because PCID 0 is reserved for |
|---|
| 62 | | - * use by non-PCID-aware users. |
|---|
| 63 | | - */ |
|---|
| 64 | | -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) |
|---|
| 29 | +/* Clear in this cpu's CR4. */ |
|---|
| 30 | +static inline void cr4_clear_bits_irqsoff(unsigned long mask) |
|---|
| 31 | +{ |
|---|
| 32 | + cr4_update_irqsoff(0, mask); |
|---|
| 33 | +} |
|---|
| 65 | 34 | |
|---|
| 35 | +/* Set in this cpu's CR4. */ |
|---|
| 36 | +static inline void cr4_set_bits(unsigned long mask) |
|---|
| 37 | +{ |
|---|
| 38 | + unsigned long flags; |
|---|
| 39 | + |
|---|
| 40 | + local_irq_save(flags); |
|---|
| 41 | + cr4_set_bits_irqsoff(mask); |
|---|
| 42 | + local_irq_restore(flags); |
|---|
| 43 | +} |
|---|
| 44 | + |
|---|
| 45 | +/* Clear in this cpu's CR4. */ |
|---|
| 46 | +static inline void cr4_clear_bits(unsigned long mask) |
|---|
| 47 | +{ |
|---|
| 48 | + unsigned long flags; |
|---|
| 49 | + |
|---|
| 50 | + local_irq_save(flags); |
|---|
| 51 | + cr4_clear_bits_irqsoff(mask); |
|---|
| 52 | + local_irq_restore(flags); |
|---|
| 53 | +} |
|---|
| 54 | + |
|---|
| 55 | +#ifndef MODULE |
|---|
| 66 | 56 | /* |
|---|
| 67 | 57 | * 6 because 6 should be plenty and struct tlb_state will fit in two cache |
|---|
| 68 | 58 | * lines. |
|---|
| 69 | 59 | */ |
|---|
| 70 | 60 | #define TLB_NR_DYN_ASIDS 6 |
|---|
| 71 | | - |
|---|
| 72 | | -/* |
|---|
| 73 | | - * Given @asid, compute kPCID |
|---|
| 74 | | - */ |
|---|
| 75 | | -static inline u16 kern_pcid(u16 asid) |
|---|
| 76 | | -{ |
|---|
| 77 | | - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); |
|---|
| 78 | | - |
|---|
| 79 | | -#ifdef CONFIG_PAGE_TABLE_ISOLATION |
|---|
| 80 | | - /* |
|---|
| 81 | | - * Make sure that the dynamic ASID space does not confict with the |
|---|
| 82 | | - * bit we are using to switch between user and kernel ASIDs. |
|---|
| 83 | | - */ |
|---|
| 84 | | - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); |
|---|
| 85 | | - |
|---|
| 86 | | - /* |
|---|
| 87 | | - * The ASID being passed in here should have respected the |
|---|
| 88 | | - * MAX_ASID_AVAILABLE and thus never have the switch bit set. |
|---|
| 89 | | - */ |
|---|
| 90 | | - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); |
|---|
| 91 | | -#endif |
|---|
| 92 | | - /* |
|---|
| 93 | | - * The dynamically-assigned ASIDs that get passed in are small |
|---|
| 94 | | - * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, |
|---|
| 95 | | - * so do not bother to clear it. |
|---|
| 96 | | - * |
|---|
| 97 | | - * If PCID is on, ASID-aware code paths put the ASID+1 into the |
|---|
| 98 | | - * PCID bits. This serves two purposes. It prevents a nasty |
|---|
| 99 | | - * situation in which PCID-unaware code saves CR3, loads some other |
|---|
| 100 | | - * value (with PCID == 0), and then restores CR3, thus corrupting |
|---|
| 101 | | - * the TLB for ASID 0 if the saved ASID was nonzero. It also means |
|---|
| 102 | | - * that any bugs involving loading a PCID-enabled CR3 with |
|---|
| 103 | | - * CR4.PCIDE off will trigger deterministically. |
|---|
| 104 | | - */ |
|---|
| 105 | | - return asid + 1; |
|---|
| 106 | | -} |
|---|
| 107 | | - |
|---|
| 108 | | -/* |
|---|
| 109 | | - * Given @asid, compute uPCID |
|---|
| 110 | | - */ |
|---|
| 111 | | -static inline u16 user_pcid(u16 asid) |
|---|
| 112 | | -{ |
|---|
| 113 | | - u16 ret = kern_pcid(asid); |
|---|
| 114 | | -#ifdef CONFIG_PAGE_TABLE_ISOLATION |
|---|
| 115 | | - ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; |
|---|
| 116 | | -#endif |
|---|
| 117 | | - return ret; |
|---|
| 118 | | -} |
|---|
| 119 | | - |
|---|
| 120 | | -struct pgd_t; |
|---|
| 121 | | -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) |
|---|
| 122 | | -{ |
|---|
| 123 | | - if (static_cpu_has(X86_FEATURE_PCID)) { |
|---|
| 124 | | - return __sme_pa(pgd) | kern_pcid(asid); |
|---|
| 125 | | - } else { |
|---|
| 126 | | - VM_WARN_ON_ONCE(asid != 0); |
|---|
| 127 | | - return __sme_pa(pgd); |
|---|
| 128 | | - } |
|---|
| 129 | | -} |
|---|
| 130 | | - |
|---|
| 131 | | -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) |
|---|
| 132 | | -{ |
|---|
| 133 | | - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); |
|---|
| 134 | | - /* |
|---|
| 135 | | - * Use boot_cpu_has() instead of this_cpu_has() as this function |
|---|
| 136 | | - * might be called during early boot. This should work even after |
|---|
| 137 | | - * boot because all CPU's the have same capabilities: |
|---|
| 138 | | - */ |
|---|
| 139 | | - VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); |
|---|
| 140 | | - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; |
|---|
| 141 | | -} |
|---|
| 142 | | - |
|---|
| 143 | | -#ifdef CONFIG_PARAVIRT |
|---|
| 144 | | -#include <asm/paravirt.h> |
|---|
| 145 | | -#else |
|---|
| 146 | | -#define __flush_tlb() __native_flush_tlb() |
|---|
| 147 | | -#define __flush_tlb_global() __native_flush_tlb_global() |
|---|
| 148 | | -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) |
|---|
| 149 | | -#endif |
|---|
| 150 | | - |
|---|
| 151 | | -static inline bool tlb_defer_switch_to_init_mm(void) |
|---|
| 152 | | -{ |
|---|
| 153 | | - /* |
|---|
| 154 | | - * If we have PCID, then switching to init_mm is reasonably |
|---|
| 155 | | - * fast. If we don't have PCID, then switching to init_mm is |
|---|
| 156 | | - * quite slow, so we try to defer it in the hopes that we can |
|---|
| 157 | | - * avoid it entirely. The latter approach runs the risk of |
|---|
| 158 | | - * receiving otherwise unnecessary IPIs. |
|---|
| 159 | | - * |
|---|
| 160 | | - * This choice is just a heuristic. The tlb code can handle this |
|---|
| 161 | | - * function returning true or false regardless of whether we have |
|---|
| 162 | | - * PCID. |
|---|
| 163 | | - */ |
|---|
| 164 | | - return !static_cpu_has(X86_FEATURE_PCID); |
|---|
| 165 | | -} |
|---|
| 166 | 61 | |
|---|
| 167 | 62 | struct tlb_context { |
|---|
| 168 | 63 | u64 ctx_id; |
|---|
| .. | .. |
|---|
| 183 | 78 | */ |
|---|
| 184 | 79 | struct mm_struct *loaded_mm; |
|---|
| 185 | 80 | |
|---|
| 186 | | -#define LOADED_MM_SWITCHING ((struct mm_struct *)1) |
|---|
| 81 | +#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL) |
|---|
| 187 | 82 | |
|---|
| 188 | 83 | /* Last user mm for optimizing IBPB */ |
|---|
| 189 | 84 | union { |
|---|
| .. | .. |
|---|
| 258 | 153 | }; |
|---|
| 259 | 154 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); |
|---|
| 260 | 155 | |
|---|
| 261 | | -/* |
|---|
| 262 | | - * Blindly accessing user memory from NMI context can be dangerous |
|---|
| 263 | | - * if we're in the middle of switching the current user task or |
|---|
| 264 | | - * switching the loaded mm. It can also be dangerous if we |
|---|
| 265 | | - * interrupted some kernel code that was temporarily using a |
|---|
| 266 | | - * different mm. |
|---|
| 267 | | - */ |
|---|
| 268 | | -static inline bool nmi_uaccess_okay(void) |
|---|
| 269 | | -{ |
|---|
| 270 | | - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); |
|---|
| 271 | | - struct mm_struct *current_mm = current->mm; |
|---|
| 272 | | - |
|---|
| 273 | | - VM_WARN_ON_ONCE(!loaded_mm); |
|---|
| 274 | | - |
|---|
| 275 | | - /* |
|---|
| 276 | | - * The condition we want to check is |
|---|
| 277 | | - * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, |
|---|
| 278 | | - * if we're running in a VM with shadow paging, and nmi_uaccess_okay() |
|---|
| 279 | | - * is supposed to be reasonably fast. |
|---|
| 280 | | - * |
|---|
| 281 | | - * Instead, we check the almost equivalent but somewhat conservative |
|---|
| 282 | | - * condition below, and we rely on the fact that switch_mm_irqs_off() |
|---|
| 283 | | - * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. |
|---|
| 284 | | - */ |
|---|
| 285 | | - if (loaded_mm != current_mm) |
|---|
| 286 | | - return false; |
|---|
| 287 | | - |
|---|
| 288 | | - VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); |
|---|
| 289 | | - |
|---|
| 290 | | - return true; |
|---|
| 291 | | -} |
|---|
| 156 | +bool nmi_uaccess_okay(void); |
|---|
| 157 | +#define nmi_uaccess_okay nmi_uaccess_okay |
|---|
| 292 | 158 | |
|---|
| 293 | 159 | /* Initialize cr4 shadow for this CPU. */ |
|---|
| 294 | 160 | static inline void cr4_init_shadow(void) |
|---|
| .. | .. |
|---|
| 296 | 162 | this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); |
|---|
| 297 | 163 | } |
|---|
| 298 | 164 | |
|---|
| 299 | | -static inline void __cr4_set(unsigned long cr4) |
|---|
| 300 | | -{ |
|---|
| 301 | | - lockdep_assert_irqs_disabled(); |
|---|
| 302 | | - this_cpu_write(cpu_tlbstate.cr4, cr4); |
|---|
| 303 | | - __write_cr4(cr4); |
|---|
| 304 | | -} |
|---|
| 305 | | - |
|---|
| 306 | | -/* Set in this cpu's CR4. */ |
|---|
| 307 | | -static inline void cr4_set_bits(unsigned long mask) |
|---|
| 308 | | -{ |
|---|
| 309 | | - unsigned long cr4, flags; |
|---|
| 310 | | - |
|---|
| 311 | | - local_irq_save(flags); |
|---|
| 312 | | - cr4 = this_cpu_read(cpu_tlbstate.cr4); |
|---|
| 313 | | - if ((cr4 | mask) != cr4) |
|---|
| 314 | | - __cr4_set(cr4 | mask); |
|---|
| 315 | | - local_irq_restore(flags); |
|---|
| 316 | | -} |
|---|
| 317 | | - |
|---|
| 318 | | -/* Clear in this cpu's CR4. */ |
|---|
| 319 | | -static inline void cr4_clear_bits(unsigned long mask) |
|---|
| 320 | | -{ |
|---|
| 321 | | - unsigned long cr4, flags; |
|---|
| 322 | | - |
|---|
| 323 | | - local_irq_save(flags); |
|---|
| 324 | | - cr4 = this_cpu_read(cpu_tlbstate.cr4); |
|---|
| 325 | | - if ((cr4 & ~mask) != cr4) |
|---|
| 326 | | - __cr4_set(cr4 & ~mask); |
|---|
| 327 | | - local_irq_restore(flags); |
|---|
| 328 | | -} |
|---|
| 329 | | - |
|---|
| 330 | | -static inline void cr4_toggle_bits_irqsoff(unsigned long mask) |
|---|
| 331 | | -{ |
|---|
| 332 | | - unsigned long cr4; |
|---|
| 333 | | - |
|---|
| 334 | | - cr4 = this_cpu_read(cpu_tlbstate.cr4); |
|---|
| 335 | | - __cr4_set(cr4 ^ mask); |
|---|
| 336 | | -} |
|---|
| 337 | | - |
|---|
| 338 | | -/* Read the CR4 shadow. */ |
|---|
| 339 | | -static inline unsigned long cr4_read_shadow(void) |
|---|
| 340 | | -{ |
|---|
| 341 | | - return this_cpu_read(cpu_tlbstate.cr4); |
|---|
| 342 | | -} |
|---|
| 343 | | - |
|---|
| 344 | | -/* |
|---|
| 345 | | - * Mark all other ASIDs as invalid, preserves the current. |
|---|
| 346 | | - */ |
|---|
| 347 | | -static inline void invalidate_other_asid(void) |
|---|
| 348 | | -{ |
|---|
| 349 | | - this_cpu_write(cpu_tlbstate.invalidate_other, true); |
|---|
| 350 | | -} |
|---|
| 351 | | - |
|---|
| 352 | | -/* |
|---|
| 353 | | - * Save some of cr4 feature set we're using (e.g. Pentium 4MB |
|---|
| 354 | | - * enable and PPro Global page enable), so that any CPU's that boot |
|---|
| 355 | | - * up after us can get the correct flags. This should only be used |
|---|
| 356 | | - * during boot on the boot cpu. |
|---|
| 357 | | - */ |
|---|
| 358 | 165 | extern unsigned long mmu_cr4_features; |
|---|
| 359 | 166 | extern u32 *trampoline_cr4_features; |
|---|
| 360 | 167 | |
|---|
| 361 | | -static inline void cr4_set_bits_and_update_boot(unsigned long mask) |
|---|
| 362 | | -{ |
|---|
| 363 | | - mmu_cr4_features |= mask; |
|---|
| 364 | | - if (trampoline_cr4_features) |
|---|
| 365 | | - *trampoline_cr4_features = mmu_cr4_features; |
|---|
| 366 | | - cr4_set_bits(mask); |
|---|
| 367 | | -} |
|---|
| 368 | | - |
|---|
| 369 | 168 | extern void initialize_tlbstate_and_flush(void); |
|---|
| 370 | | - |
|---|
| 371 | | -/* |
|---|
| 372 | | - * Given an ASID, flush the corresponding user ASID. We can delay this |
|---|
| 373 | | - * until the next time we switch to it. |
|---|
| 374 | | - * |
|---|
| 375 | | - * See SWITCH_TO_USER_CR3. |
|---|
| 376 | | - */ |
|---|
| 377 | | -static inline void invalidate_user_asid(u16 asid) |
|---|
| 378 | | -{ |
|---|
| 379 | | - /* There is no user ASID if address space separation is off */ |
|---|
| 380 | | - if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) |
|---|
| 381 | | - return; |
|---|
| 382 | | - |
|---|
| 383 | | - /* |
|---|
| 384 | | - * We only have a single ASID if PCID is off and the CR3 |
|---|
| 385 | | - * write will have flushed it. |
|---|
| 386 | | - */ |
|---|
| 387 | | - if (!cpu_feature_enabled(X86_FEATURE_PCID)) |
|---|
| 388 | | - return; |
|---|
| 389 | | - |
|---|
| 390 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 391 | | - return; |
|---|
| 392 | | - |
|---|
| 393 | | - __set_bit(kern_pcid(asid), |
|---|
| 394 | | - (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); |
|---|
| 395 | | -} |
|---|
| 396 | | - |
|---|
| 397 | | -/* |
|---|
| 398 | | - * flush the entire current user mapping |
|---|
| 399 | | - */ |
|---|
| 400 | | -static inline void __native_flush_tlb(void) |
|---|
| 401 | | -{ |
|---|
| 402 | | - /* |
|---|
| 403 | | - * Preemption or interrupts must be disabled to protect the access |
|---|
| 404 | | - * to the per CPU variable and to prevent being preempted between |
|---|
| 405 | | - * read_cr3() and write_cr3(). |
|---|
| 406 | | - */ |
|---|
| 407 | | - WARN_ON_ONCE(preemptible()); |
|---|
| 408 | | - |
|---|
| 409 | | - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); |
|---|
| 410 | | - |
|---|
| 411 | | - /* If current->mm == NULL then the read_cr3() "borrows" an mm */ |
|---|
| 412 | | - native_write_cr3(__native_read_cr3()); |
|---|
| 413 | | -} |
|---|
| 414 | | - |
|---|
| 415 | | -/* |
|---|
| 416 | | - * flush everything |
|---|
| 417 | | - */ |
|---|
| 418 | | -static inline void __native_flush_tlb_global(void) |
|---|
| 419 | | -{ |
|---|
| 420 | | - unsigned long cr4, flags; |
|---|
| 421 | | - |
|---|
| 422 | | - if (static_cpu_has(X86_FEATURE_INVPCID)) { |
|---|
| 423 | | - /* |
|---|
| 424 | | - * Using INVPCID is considerably faster than a pair of writes |
|---|
| 425 | | - * to CR4 sandwiched inside an IRQ flag save/restore. |
|---|
| 426 | | - * |
|---|
| 427 | | - * Note, this works with CR4.PCIDE=0 or 1. |
|---|
| 428 | | - */ |
|---|
| 429 | | - invpcid_flush_all(); |
|---|
| 430 | | - return; |
|---|
| 431 | | - } |
|---|
| 432 | | - |
|---|
| 433 | | - /* |
|---|
| 434 | | - * Read-modify-write to CR4 - protect it from preemption and |
|---|
| 435 | | - * from interrupts. (Use the raw variant because this code can |
|---|
| 436 | | - * be called from deep inside debugging code.) |
|---|
| 437 | | - */ |
|---|
| 438 | | - raw_local_irq_save(flags); |
|---|
| 439 | | - |
|---|
| 440 | | - cr4 = this_cpu_read(cpu_tlbstate.cr4); |
|---|
| 441 | | - /* toggle PGE */ |
|---|
| 442 | | - native_write_cr4(cr4 ^ X86_CR4_PGE); |
|---|
| 443 | | - /* write old PGE again and flush TLBs */ |
|---|
| 444 | | - native_write_cr4(cr4); |
|---|
| 445 | | - |
|---|
| 446 | | - raw_local_irq_restore(flags); |
|---|
| 447 | | -} |
|---|
| 448 | | - |
|---|
| 449 | | -/* |
|---|
| 450 | | - * flush one page in the user mapping |
|---|
| 451 | | - */ |
|---|
| 452 | | -static inline void __native_flush_tlb_one_user(unsigned long addr) |
|---|
| 453 | | -{ |
|---|
| 454 | | - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); |
|---|
| 455 | | - |
|---|
| 456 | | - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); |
|---|
| 457 | | - |
|---|
| 458 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 459 | | - return; |
|---|
| 460 | | - |
|---|
| 461 | | - /* |
|---|
| 462 | | - * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. |
|---|
| 463 | | - * Just use invalidate_user_asid() in case we are called early. |
|---|
| 464 | | - */ |
|---|
| 465 | | - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) |
|---|
| 466 | | - invalidate_user_asid(loaded_mm_asid); |
|---|
| 467 | | - else |
|---|
| 468 | | - invpcid_flush_one(user_pcid(loaded_mm_asid), addr); |
|---|
| 469 | | -} |
|---|
| 470 | | - |
|---|
| 471 | | -/* |
|---|
| 472 | | - * flush everything |
|---|
| 473 | | - */ |
|---|
| 474 | | -static inline void __flush_tlb_all(void) |
|---|
| 475 | | -{ |
|---|
| 476 | | - /* |
|---|
| 477 | | - * This is to catch users with enabled preemption and the PGE feature |
|---|
| 478 | | - * and don't trigger the warning in __native_flush_tlb(). |
|---|
| 479 | | - */ |
|---|
| 480 | | - VM_WARN_ON_ONCE(preemptible()); |
|---|
| 481 | | - |
|---|
| 482 | | - if (boot_cpu_has(X86_FEATURE_PGE)) { |
|---|
| 483 | | - __flush_tlb_global(); |
|---|
| 484 | | - } else { |
|---|
| 485 | | - /* |
|---|
| 486 | | - * !PGE -> !PCID (setup_pcid()), thus every flush is total. |
|---|
| 487 | | - */ |
|---|
| 488 | | - __flush_tlb(); |
|---|
| 489 | | - } |
|---|
| 490 | | -} |
|---|
| 491 | | - |
|---|
| 492 | | -/* |
|---|
| 493 | | - * flush one page in the kernel mapping |
|---|
| 494 | | - */ |
|---|
| 495 | | -static inline void __flush_tlb_one_kernel(unsigned long addr) |
|---|
| 496 | | -{ |
|---|
| 497 | | - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); |
|---|
| 498 | | - |
|---|
| 499 | | - /* |
|---|
| 500 | | - * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its |
|---|
| 501 | | - * paravirt equivalent. Even with PCID, this is sufficient: we only |
|---|
| 502 | | - * use PCID if we also use global PTEs for the kernel mapping, and |
|---|
| 503 | | - * INVLPG flushes global translations across all address spaces. |
|---|
| 504 | | - * |
|---|
| 505 | | - * If PTI is on, then the kernel is mapped with non-global PTEs, and |
|---|
| 506 | | - * __flush_tlb_one_user() will flush the given address for the current |
|---|
| 507 | | - * kernel address space and for its usermode counterpart, but it does |
|---|
| 508 | | - * not flush it for other address spaces. |
|---|
| 509 | | - */ |
|---|
| 510 | | - __flush_tlb_one_user(addr); |
|---|
| 511 | | - |
|---|
| 512 | | - if (!static_cpu_has(X86_FEATURE_PTI)) |
|---|
| 513 | | - return; |
|---|
| 514 | | - |
|---|
| 515 | | - /* |
|---|
| 516 | | - * See above. We need to propagate the flush to all other address |
|---|
| 517 | | - * spaces. In principle, we only need to propagate it to kernelmode |
|---|
| 518 | | - * address spaces, but the extra bookkeeping we would need is not |
|---|
| 519 | | - * worth it. |
|---|
| 520 | | - */ |
|---|
| 521 | | - invalidate_other_asid(); |
|---|
| 522 | | -} |
|---|
| 523 | | - |
|---|
| 524 | | -#define TLB_FLUSH_ALL -1UL |
|---|
| 525 | 169 | |
|---|
| 526 | 170 | /* |
|---|
| 527 | 171 | * TLB flushing: |
|---|
| .. | .. |
|---|
| 557 | 201 | unsigned long start; |
|---|
| 558 | 202 | unsigned long end; |
|---|
| 559 | 203 | u64 new_tlb_gen; |
|---|
| 204 | + unsigned int stride_shift; |
|---|
| 205 | + bool freed_tables; |
|---|
| 560 | 206 | }; |
|---|
| 561 | 207 | |
|---|
| 562 | | -#define local_flush_tlb() __flush_tlb() |
|---|
| 208 | +void flush_tlb_local(void); |
|---|
| 209 | +void flush_tlb_one_user(unsigned long addr); |
|---|
| 210 | +void flush_tlb_one_kernel(unsigned long addr); |
|---|
| 211 | +void flush_tlb_others(const struct cpumask *cpumask, |
|---|
| 212 | + const struct flush_tlb_info *info); |
|---|
| 563 | 213 | |
|---|
| 564 | | -#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) |
|---|
| 214 | +#ifdef CONFIG_PARAVIRT |
|---|
| 215 | +#include <asm/paravirt.h> |
|---|
| 216 | +#endif |
|---|
| 565 | 217 | |
|---|
| 566 | | -#define flush_tlb_range(vma, start, end) \ |
|---|
| 567 | | - flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) |
|---|
| 218 | +#define flush_tlb_mm(mm) \ |
|---|
| 219 | + flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) |
|---|
| 220 | + |
|---|
| 221 | +#define flush_tlb_range(vma, start, end) \ |
|---|
| 222 | + flush_tlb_mm_range((vma)->vm_mm, start, end, \ |
|---|
| 223 | + ((vma)->vm_flags & VM_HUGETLB) \ |
|---|
| 224 | + ? huge_page_shift(hstate_vma(vma)) \ |
|---|
| 225 | + : PAGE_SHIFT, false) |
|---|
| 568 | 226 | |
|---|
| 569 | 227 | extern void flush_tlb_all(void); |
|---|
| 570 | 228 | extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, |
|---|
| 571 | | - unsigned long end, unsigned long vmflag); |
|---|
| 229 | + unsigned long end, unsigned int stride_shift, |
|---|
| 230 | + bool freed_tables); |
|---|
| 572 | 231 | extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); |
|---|
| 573 | 232 | |
|---|
| 574 | 233 | static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) |
|---|
| 575 | 234 | { |
|---|
| 576 | | - flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); |
|---|
| 235 | + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); |
|---|
| 577 | 236 | } |
|---|
| 578 | | - |
|---|
| 579 | | -void native_flush_tlb_others(const struct cpumask *cpumask, |
|---|
| 580 | | - const struct flush_tlb_info *info); |
|---|
| 581 | 237 | |
|---|
| 582 | 238 | static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) |
|---|
| 583 | 239 | { |
|---|
| .. | .. |
|---|
| 599 | 255 | |
|---|
| 600 | 256 | extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); |
|---|
| 601 | 257 | |
|---|
| 602 | | -#ifndef CONFIG_PARAVIRT |
|---|
| 603 | | -#define flush_tlb_others(mask, info) \ |
|---|
| 604 | | - native_flush_tlb_others(mask, info) |
|---|
| 605 | | - |
|---|
| 606 | | -#define paravirt_tlb_remove_table(tlb, page) \ |
|---|
| 607 | | - tlb_remove_page(tlb, (void *)(page)) |
|---|
| 608 | | -#endif |
|---|
| 258 | +#endif /* !MODULE */ |
|---|
| 609 | 259 | |
|---|
| 610 | 260 | #endif /* _ASM_X86_TLBFLUSH_H */ |
|---|