| .. | .. | 
|---|
| 13 | 13 |  #include <asm/pti.h> | 
|---|
| 14 | 14 |  #include <asm/processor-flags.h> | 
|---|
| 15 | 15 |   | 
|---|
| 16 |  | -/*  | 
|---|
| 17 |  | - * The x86 feature is called PCID (Process Context IDentifier). It is similar  | 
|---|
| 18 |  | - * to what is traditionally called ASID on the RISC processors.  | 
|---|
| 19 |  | - *  | 
|---|
| 20 |  | - * We don't use the traditional ASID implementation, where each process/mm gets  | 
|---|
| 21 |  | - * its own ASID and flush/restart when we run out of ASID space.  | 
|---|
| 22 |  | - *  | 
|---|
| 23 |  | - * Instead we have a small per-cpu array of ASIDs and cache the last few mm's  | 
|---|
| 24 |  | - * that came by on this CPU, allowing cheaper switch_mm between processes on  | 
|---|
| 25 |  | - * this CPU.  | 
|---|
| 26 |  | - *  | 
|---|
| 27 |  | - * We end up with different spaces for different things. To avoid confusion we  | 
|---|
| 28 |  | - * use different names for each of them:  | 
|---|
| 29 |  | - *  | 
|---|
| 30 |  | - * ASID  - [0, TLB_NR_DYN_ASIDS-1]  | 
|---|
| 31 |  | - *         the canonical identifier for an mm  | 
|---|
| 32 |  | - *  | 
|---|
| 33 |  | - * kPCID - [1, TLB_NR_DYN_ASIDS]  | 
|---|
| 34 |  | - *         the value we write into the PCID part of CR3; corresponds to the  | 
|---|
| 35 |  | - *         ASID+1, because PCID 0 is special.  | 
|---|
| 36 |  | - *  | 
|---|
| 37 |  | - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]  | 
|---|
| 38 |  | - *         for KPTI each mm has two address spaces and thus needs two  | 
|---|
| 39 |  | - *         PCID values, but we can still do with a single ASID denomination  | 
|---|
| 40 |  | - *         for each mm. Corresponds to kPCID + 2048.  | 
|---|
| 41 |  | - *  | 
|---|
| 42 |  | - */  | 
|---|
 | 16 | +void __flush_tlb_all(void);  | 
|---|
| 43 | 17 |   | 
|---|
| 44 |  | -/* There are 12 bits of space for ASIDS in CR3 */  | 
|---|
| 45 |  | -#define CR3_HW_ASID_BITS		12  | 
|---|
 | 18 | +#define TLB_FLUSH_ALL	-1UL  | 
|---|
| 46 | 19 |   | 
|---|
| 47 |  | -/*  | 
|---|
| 48 |  | - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for  | 
|---|
| 49 |  | - * user/kernel switches  | 
|---|
| 50 |  | - */  | 
|---|
| 51 |  | -#ifdef CONFIG_PAGE_TABLE_ISOLATION  | 
|---|
| 52 |  | -# define PTI_CONSUMED_PCID_BITS	1  | 
|---|
| 53 |  | -#else  | 
|---|
| 54 |  | -# define PTI_CONSUMED_PCID_BITS	0  | 
|---|
| 55 |  | -#endif  | 
|---|
 | 20 | +void cr4_update_irqsoff(unsigned long set, unsigned long clear);  | 
|---|
 | 21 | +unsigned long cr4_read_shadow(void);  | 
|---|
| 56 | 22 |   | 
|---|
| 57 |  | -#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)  | 
|---|
 | 23 | +/* Set in this cpu's CR4. */  | 
|---|
 | 24 | +static inline void cr4_set_bits_irqsoff(unsigned long mask)  | 
|---|
 | 25 | +{  | 
|---|
 | 26 | +	cr4_update_irqsoff(mask, 0);  | 
|---|
 | 27 | +}  | 
|---|
| 58 | 28 |   | 
|---|
| 59 |  | -/*  | 
|---|
| 60 |  | - * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account  | 
|---|
| 61 |  | - * for them being zero-based.  Another -1 is because PCID 0 is reserved for  | 
|---|
| 62 |  | - * use by non-PCID-aware users.  | 
|---|
| 63 |  | - */  | 
|---|
| 64 |  | -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)  | 
|---|
 | 29 | +/* Clear in this cpu's CR4. */  | 
|---|
 | 30 | +static inline void cr4_clear_bits_irqsoff(unsigned long mask)  | 
|---|
 | 31 | +{  | 
|---|
 | 32 | +	cr4_update_irqsoff(0, mask);  | 
|---|
 | 33 | +}  | 
|---|
| 65 | 34 |   | 
|---|
 | 35 | +/* Set in this cpu's CR4. */  | 
|---|
 | 36 | +static inline void cr4_set_bits(unsigned long mask)  | 
|---|
 | 37 | +{  | 
|---|
 | 38 | +	unsigned long flags;  | 
|---|
 | 39 | +  | 
|---|
 | 40 | +	local_irq_save(flags);  | 
|---|
 | 41 | +	cr4_set_bits_irqsoff(mask);  | 
|---|
 | 42 | +	local_irq_restore(flags);  | 
|---|
 | 43 | +}  | 
|---|
 | 44 | +  | 
|---|
 | 45 | +/* Clear in this cpu's CR4. */  | 
|---|
 | 46 | +static inline void cr4_clear_bits(unsigned long mask)  | 
|---|
 | 47 | +{  | 
|---|
 | 48 | +	unsigned long flags;  | 
|---|
 | 49 | +  | 
|---|
 | 50 | +	local_irq_save(flags);  | 
|---|
 | 51 | +	cr4_clear_bits_irqsoff(mask);  | 
|---|
 | 52 | +	local_irq_restore(flags);  | 
|---|
 | 53 | +}  | 
|---|
 | 54 | +  | 
|---|
 | 55 | +#ifndef MODULE  | 
|---|
| 66 | 56 |  /* | 
|---|
| 67 | 57 |   * 6 because 6 should be plenty and struct tlb_state will fit in two cache | 
|---|
| 68 | 58 |   * lines. | 
|---|
| 69 | 59 |   */ | 
|---|
| 70 | 60 |  #define TLB_NR_DYN_ASIDS	6 | 
|---|
| 71 |  | -  | 
|---|
| 72 |  | -/*  | 
|---|
| 73 |  | - * Given @asid, compute kPCID  | 
|---|
| 74 |  | - */  | 
|---|
| 75 |  | -static inline u16 kern_pcid(u16 asid)  | 
|---|
| 76 |  | -{  | 
|---|
| 77 |  | -	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);  | 
|---|
| 78 |  | -  | 
|---|
| 79 |  | -#ifdef CONFIG_PAGE_TABLE_ISOLATION  | 
|---|
| 80 |  | -	/*  | 
|---|
| 81 |  | -	 * Make sure that the dynamic ASID space does not confict with the  | 
|---|
| 82 |  | -	 * bit we are using to switch between user and kernel ASIDs.  | 
|---|
| 83 |  | -	 */  | 
|---|
| 84 |  | -	BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));  | 
|---|
| 85 |  | -  | 
|---|
| 86 |  | -	/*  | 
|---|
| 87 |  | -	 * The ASID being passed in here should have respected the  | 
|---|
| 88 |  | -	 * MAX_ASID_AVAILABLE and thus never have the switch bit set.  | 
|---|
| 89 |  | -	 */  | 
|---|
| 90 |  | -	VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));  | 
|---|
| 91 |  | -#endif  | 
|---|
| 92 |  | -	/*  | 
|---|
| 93 |  | -	 * The dynamically-assigned ASIDs that get passed in are small  | 
|---|
| 94 |  | -	 * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,  | 
|---|
| 95 |  | -	 * so do not bother to clear it.  | 
|---|
| 96 |  | -	 *  | 
|---|
| 97 |  | -	 * If PCID is on, ASID-aware code paths put the ASID+1 into the  | 
|---|
| 98 |  | -	 * PCID bits.  This serves two purposes.  It prevents a nasty  | 
|---|
| 99 |  | -	 * situation in which PCID-unaware code saves CR3, loads some other  | 
|---|
| 100 |  | -	 * value (with PCID == 0), and then restores CR3, thus corrupting  | 
|---|
| 101 |  | -	 * the TLB for ASID 0 if the saved ASID was nonzero.  It also means  | 
|---|
| 102 |  | -	 * that any bugs involving loading a PCID-enabled CR3 with  | 
|---|
| 103 |  | -	 * CR4.PCIDE off will trigger deterministically.  | 
|---|
| 104 |  | -	 */  | 
|---|
| 105 |  | -	return asid + 1;  | 
|---|
| 106 |  | -}  | 
|---|
| 107 |  | -  | 
|---|
| 108 |  | -/*  | 
|---|
| 109 |  | - * Given @asid, compute uPCID  | 
|---|
| 110 |  | - */  | 
|---|
| 111 |  | -static inline u16 user_pcid(u16 asid)  | 
|---|
| 112 |  | -{  | 
|---|
| 113 |  | -	u16 ret = kern_pcid(asid);  | 
|---|
| 114 |  | -#ifdef CONFIG_PAGE_TABLE_ISOLATION  | 
|---|
| 115 |  | -	ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;  | 
|---|
| 116 |  | -#endif  | 
|---|
| 117 |  | -	return ret;  | 
|---|
| 118 |  | -}  | 
|---|
| 119 |  | -  | 
|---|
| 120 |  | -struct pgd_t;  | 
|---|
| 121 |  | -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)  | 
|---|
| 122 |  | -{  | 
|---|
| 123 |  | -	if (static_cpu_has(X86_FEATURE_PCID)) {  | 
|---|
| 124 |  | -		return __sme_pa(pgd) | kern_pcid(asid);  | 
|---|
| 125 |  | -	} else {  | 
|---|
| 126 |  | -		VM_WARN_ON_ONCE(asid != 0);  | 
|---|
| 127 |  | -		return __sme_pa(pgd);  | 
|---|
| 128 |  | -	}  | 
|---|
| 129 |  | -}  | 
|---|
| 130 |  | -  | 
|---|
| 131 |  | -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)  | 
|---|
| 132 |  | -{  | 
|---|
| 133 |  | -	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);  | 
|---|
| 134 |  | -	/*  | 
|---|
| 135 |  | -	 * Use boot_cpu_has() instead of this_cpu_has() as this function  | 
|---|
| 136 |  | -	 * might be called during early boot. This should work even after  | 
|---|
| 137 |  | -	 * boot because all CPU's the have same capabilities:  | 
|---|
| 138 |  | -	 */  | 
|---|
| 139 |  | -	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));  | 
|---|
| 140 |  | -	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;  | 
|---|
| 141 |  | -}  | 
|---|
| 142 |  | -  | 
|---|
| 143 |  | -#ifdef CONFIG_PARAVIRT  | 
|---|
| 144 |  | -#include <asm/paravirt.h>  | 
|---|
| 145 |  | -#else  | 
|---|
| 146 |  | -#define __flush_tlb() __native_flush_tlb()  | 
|---|
| 147 |  | -#define __flush_tlb_global() __native_flush_tlb_global()  | 
|---|
| 148 |  | -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)  | 
|---|
| 149 |  | -#endif  | 
|---|
| 150 |  | -  | 
|---|
| 151 |  | -static inline bool tlb_defer_switch_to_init_mm(void)  | 
|---|
| 152 |  | -{  | 
|---|
| 153 |  | -	/*  | 
|---|
| 154 |  | -	 * If we have PCID, then switching to init_mm is reasonably  | 
|---|
| 155 |  | -	 * fast.  If we don't have PCID, then switching to init_mm is  | 
|---|
| 156 |  | -	 * quite slow, so we try to defer it in the hopes that we can  | 
|---|
| 157 |  | -	 * avoid it entirely.  The latter approach runs the risk of  | 
|---|
| 158 |  | -	 * receiving otherwise unnecessary IPIs.  | 
|---|
| 159 |  | -	 *  | 
|---|
| 160 |  | -	 * This choice is just a heuristic.  The tlb code can handle this  | 
|---|
| 161 |  | -	 * function returning true or false regardless of whether we have  | 
|---|
| 162 |  | -	 * PCID.  | 
|---|
| 163 |  | -	 */  | 
|---|
| 164 |  | -	return !static_cpu_has(X86_FEATURE_PCID);  | 
|---|
| 165 |  | -}  | 
|---|
| 166 | 61 |   | 
|---|
| 167 | 62 |  struct tlb_context { | 
|---|
| 168 | 63 |  	u64 ctx_id; | 
|---|
| .. | .. | 
|---|
| 183 | 78 |  	 */ | 
|---|
| 184 | 79 |  	struct mm_struct *loaded_mm; | 
|---|
| 185 | 80 |   | 
|---|
| 186 |  | -#define LOADED_MM_SWITCHING ((struct mm_struct *)1)  | 
|---|
 | 81 | +#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)  | 
|---|
| 187 | 82 |   | 
|---|
| 188 | 83 |  	/* Last user mm for optimizing IBPB */ | 
|---|
| 189 | 84 |  	union { | 
|---|
| .. | .. | 
|---|
| 258 | 153 |  }; | 
|---|
| 259 | 154 |  DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | 
|---|
| 260 | 155 |   | 
|---|
| 261 |  | -/*  | 
|---|
| 262 |  | - * Blindly accessing user memory from NMI context can be dangerous  | 
|---|
| 263 |  | - * if we're in the middle of switching the current user task or  | 
|---|
| 264 |  | - * switching the loaded mm.  It can also be dangerous if we  | 
|---|
| 265 |  | - * interrupted some kernel code that was temporarily using a  | 
|---|
| 266 |  | - * different mm.  | 
|---|
| 267 |  | - */  | 
|---|
| 268 |  | -static inline bool nmi_uaccess_okay(void)  | 
|---|
| 269 |  | -{  | 
|---|
| 270 |  | -	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);  | 
|---|
| 271 |  | -	struct mm_struct *current_mm = current->mm;  | 
|---|
| 272 |  | -  | 
|---|
| 273 |  | -	VM_WARN_ON_ONCE(!loaded_mm);  | 
|---|
| 274 |  | -  | 
|---|
| 275 |  | -	/*  | 
|---|
| 276 |  | -	 * The condition we want to check is  | 
|---|
| 277 |  | -	 * current_mm->pgd == __va(read_cr3_pa()).  This may be slow, though,  | 
|---|
| 278 |  | -	 * if we're running in a VM with shadow paging, and nmi_uaccess_okay()  | 
|---|
| 279 |  | -	 * is supposed to be reasonably fast.  | 
|---|
| 280 |  | -	 *  | 
|---|
| 281 |  | -	 * Instead, we check the almost equivalent but somewhat conservative  | 
|---|
| 282 |  | -	 * condition below, and we rely on the fact that switch_mm_irqs_off()  | 
|---|
| 283 |  | -	 * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.  | 
|---|
| 284 |  | -	 */  | 
|---|
| 285 |  | -	if (loaded_mm != current_mm)  | 
|---|
| 286 |  | -		return false;  | 
|---|
| 287 |  | -  | 
|---|
| 288 |  | -	VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));  | 
|---|
| 289 |  | -  | 
|---|
| 290 |  | -	return true;  | 
|---|
| 291 |  | -}  | 
|---|
 | 156 | +bool nmi_uaccess_okay(void);  | 
|---|
 | 157 | +#define nmi_uaccess_okay nmi_uaccess_okay  | 
|---|
| 292 | 158 |   | 
|---|
| 293 | 159 |  /* Initialize cr4 shadow for this CPU. */ | 
|---|
| 294 | 160 |  static inline void cr4_init_shadow(void) | 
|---|
| .. | .. | 
|---|
| 296 | 162 |  	this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); | 
|---|
| 297 | 163 |  } | 
|---|
| 298 | 164 |   | 
|---|
| 299 |  | -static inline void __cr4_set(unsigned long cr4)  | 
|---|
| 300 |  | -{  | 
|---|
| 301 |  | -	lockdep_assert_irqs_disabled();  | 
|---|
| 302 |  | -	this_cpu_write(cpu_tlbstate.cr4, cr4);  | 
|---|
| 303 |  | -	__write_cr4(cr4);  | 
|---|
| 304 |  | -}  | 
|---|
| 305 |  | -  | 
|---|
| 306 |  | -/* Set in this cpu's CR4. */  | 
|---|
| 307 |  | -static inline void cr4_set_bits(unsigned long mask)  | 
|---|
| 308 |  | -{  | 
|---|
| 309 |  | -	unsigned long cr4, flags;  | 
|---|
| 310 |  | -  | 
|---|
| 311 |  | -	local_irq_save(flags);  | 
|---|
| 312 |  | -	cr4 = this_cpu_read(cpu_tlbstate.cr4);  | 
|---|
| 313 |  | -	if ((cr4 | mask) != cr4)  | 
|---|
| 314 |  | -		__cr4_set(cr4 | mask);  | 
|---|
| 315 |  | -	local_irq_restore(flags);  | 
|---|
| 316 |  | -}  | 
|---|
| 317 |  | -  | 
|---|
| 318 |  | -/* Clear in this cpu's CR4. */  | 
|---|
| 319 |  | -static inline void cr4_clear_bits(unsigned long mask)  | 
|---|
| 320 |  | -{  | 
|---|
| 321 |  | -	unsigned long cr4, flags;  | 
|---|
| 322 |  | -  | 
|---|
| 323 |  | -	local_irq_save(flags);  | 
|---|
| 324 |  | -	cr4 = this_cpu_read(cpu_tlbstate.cr4);  | 
|---|
| 325 |  | -	if ((cr4 & ~mask) != cr4)  | 
|---|
| 326 |  | -		__cr4_set(cr4 & ~mask);  | 
|---|
| 327 |  | -	local_irq_restore(flags);  | 
|---|
| 328 |  | -}  | 
|---|
| 329 |  | -  | 
|---|
| 330 |  | -static inline void cr4_toggle_bits_irqsoff(unsigned long mask)  | 
|---|
| 331 |  | -{  | 
|---|
| 332 |  | -	unsigned long cr4;  | 
|---|
| 333 |  | -  | 
|---|
| 334 |  | -	cr4 = this_cpu_read(cpu_tlbstate.cr4);  | 
|---|
| 335 |  | -	__cr4_set(cr4 ^ mask);  | 
|---|
| 336 |  | -}  | 
|---|
| 337 |  | -  | 
|---|
| 338 |  | -/* Read the CR4 shadow. */  | 
|---|
| 339 |  | -static inline unsigned long cr4_read_shadow(void)  | 
|---|
| 340 |  | -{  | 
|---|
| 341 |  | -	return this_cpu_read(cpu_tlbstate.cr4);  | 
|---|
| 342 |  | -}  | 
|---|
| 343 |  | -  | 
|---|
| 344 |  | -/*  | 
|---|
| 345 |  | - * Mark all other ASIDs as invalid, preserves the current.  | 
|---|
| 346 |  | - */  | 
|---|
| 347 |  | -static inline void invalidate_other_asid(void)  | 
|---|
| 348 |  | -{  | 
|---|
| 349 |  | -	this_cpu_write(cpu_tlbstate.invalidate_other, true);  | 
|---|
| 350 |  | -}  | 
|---|
| 351 |  | -  | 
|---|
| 352 |  | -/*  | 
|---|
| 353 |  | - * Save some of cr4 feature set we're using (e.g.  Pentium 4MB  | 
|---|
| 354 |  | - * enable and PPro Global page enable), so that any CPU's that boot  | 
|---|
| 355 |  | - * up after us can get the correct flags.  This should only be used  | 
|---|
| 356 |  | - * during boot on the boot cpu.  | 
|---|
| 357 |  | - */  | 
|---|
| 358 | 165 |  extern unsigned long mmu_cr4_features; | 
|---|
| 359 | 166 |  extern u32 *trampoline_cr4_features; | 
|---|
| 360 | 167 |   | 
|---|
| 361 |  | -static inline void cr4_set_bits_and_update_boot(unsigned long mask)  | 
|---|
| 362 |  | -{  | 
|---|
| 363 |  | -	mmu_cr4_features |= mask;  | 
|---|
| 364 |  | -	if (trampoline_cr4_features)  | 
|---|
| 365 |  | -		*trampoline_cr4_features = mmu_cr4_features;  | 
|---|
| 366 |  | -	cr4_set_bits(mask);  | 
|---|
| 367 |  | -}  | 
|---|
| 368 |  | -  | 
|---|
| 369 | 168 |  extern void initialize_tlbstate_and_flush(void); | 
|---|
| 370 |  | -  | 
|---|
| 371 |  | -/*  | 
|---|
| 372 |  | - * Given an ASID, flush the corresponding user ASID.  We can delay this  | 
|---|
| 373 |  | - * until the next time we switch to it.  | 
|---|
| 374 |  | - *  | 
|---|
| 375 |  | - * See SWITCH_TO_USER_CR3.  | 
|---|
| 376 |  | - */  | 
|---|
| 377 |  | -static inline void invalidate_user_asid(u16 asid)  | 
|---|
| 378 |  | -{  | 
|---|
| 379 |  | -	/* There is no user ASID if address space separation is off */  | 
|---|
| 380 |  | -	if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))  | 
|---|
| 381 |  | -		return;  | 
|---|
| 382 |  | -  | 
|---|
| 383 |  | -	/*  | 
|---|
| 384 |  | -	 * We only have a single ASID if PCID is off and the CR3  | 
|---|
| 385 |  | -	 * write will have flushed it.  | 
|---|
| 386 |  | -	 */  | 
|---|
| 387 |  | -	if (!cpu_feature_enabled(X86_FEATURE_PCID))  | 
|---|
| 388 |  | -		return;  | 
|---|
| 389 |  | -  | 
|---|
| 390 |  | -	if (!static_cpu_has(X86_FEATURE_PTI))  | 
|---|
| 391 |  | -		return;  | 
|---|
| 392 |  | -  | 
|---|
| 393 |  | -	__set_bit(kern_pcid(asid),  | 
|---|
| 394 |  | -		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));  | 
|---|
| 395 |  | -}  | 
|---|
| 396 |  | -  | 
|---|
| 397 |  | -/*  | 
|---|
| 398 |  | - * flush the entire current user mapping  | 
|---|
| 399 |  | - */  | 
|---|
| 400 |  | -static inline void __native_flush_tlb(void)  | 
|---|
| 401 |  | -{  | 
|---|
| 402 |  | -	/*  | 
|---|
| 403 |  | -	 * Preemption or interrupts must be disabled to protect the access  | 
|---|
| 404 |  | -	 * to the per CPU variable and to prevent being preempted between  | 
|---|
| 405 |  | -	 * read_cr3() and write_cr3().  | 
|---|
| 406 |  | -	 */  | 
|---|
| 407 |  | -	WARN_ON_ONCE(preemptible());  | 
|---|
| 408 |  | -  | 
|---|
| 409 |  | -	invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));  | 
|---|
| 410 |  | -  | 
|---|
| 411 |  | -	/* If current->mm == NULL then the read_cr3() "borrows" an mm */  | 
|---|
| 412 |  | -	native_write_cr3(__native_read_cr3());  | 
|---|
| 413 |  | -}  | 
|---|
| 414 |  | -  | 
|---|
| 415 |  | -/*  | 
|---|
| 416 |  | - * flush everything  | 
|---|
| 417 |  | - */  | 
|---|
| 418 |  | -static inline void __native_flush_tlb_global(void)  | 
|---|
| 419 |  | -{  | 
|---|
| 420 |  | -	unsigned long cr4, flags;  | 
|---|
| 421 |  | -  | 
|---|
| 422 |  | -	if (static_cpu_has(X86_FEATURE_INVPCID)) {  | 
|---|
| 423 |  | -		/*  | 
|---|
| 424 |  | -		 * Using INVPCID is considerably faster than a pair of writes  | 
|---|
| 425 |  | -		 * to CR4 sandwiched inside an IRQ flag save/restore.  | 
|---|
| 426 |  | -		 *  | 
|---|
| 427 |  | -		 * Note, this works with CR4.PCIDE=0 or 1.  | 
|---|
| 428 |  | -		 */  | 
|---|
| 429 |  | -		invpcid_flush_all();  | 
|---|
| 430 |  | -		return;  | 
|---|
| 431 |  | -	}  | 
|---|
| 432 |  | -  | 
|---|
| 433 |  | -	/*  | 
|---|
| 434 |  | -	 * Read-modify-write to CR4 - protect it from preemption and  | 
|---|
| 435 |  | -	 * from interrupts. (Use the raw variant because this code can  | 
|---|
| 436 |  | -	 * be called from deep inside debugging code.)  | 
|---|
| 437 |  | -	 */  | 
|---|
| 438 |  | -	raw_local_irq_save(flags);  | 
|---|
| 439 |  | -  | 
|---|
| 440 |  | -	cr4 = this_cpu_read(cpu_tlbstate.cr4);  | 
|---|
| 441 |  | -	/* toggle PGE */  | 
|---|
| 442 |  | -	native_write_cr4(cr4 ^ X86_CR4_PGE);  | 
|---|
| 443 |  | -	/* write old PGE again and flush TLBs */  | 
|---|
| 444 |  | -	native_write_cr4(cr4);  | 
|---|
| 445 |  | -  | 
|---|
| 446 |  | -	raw_local_irq_restore(flags);  | 
|---|
| 447 |  | -}  | 
|---|
| 448 |  | -  | 
|---|
| 449 |  | -/*  | 
|---|
| 450 |  | - * flush one page in the user mapping  | 
|---|
| 451 |  | - */  | 
|---|
| 452 |  | -static inline void __native_flush_tlb_one_user(unsigned long addr)  | 
|---|
| 453 |  | -{  | 
|---|
| 454 |  | -	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);  | 
|---|
| 455 |  | -  | 
|---|
| 456 |  | -	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");  | 
|---|
| 457 |  | -  | 
|---|
| 458 |  | -	if (!static_cpu_has(X86_FEATURE_PTI))  | 
|---|
| 459 |  | -		return;  | 
|---|
| 460 |  | -  | 
|---|
| 461 |  | -	/*  | 
|---|
| 462 |  | -	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.  | 
|---|
| 463 |  | -	 * Just use invalidate_user_asid() in case we are called early.  | 
|---|
| 464 |  | -	 */  | 
|---|
| 465 |  | -	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))  | 
|---|
| 466 |  | -		invalidate_user_asid(loaded_mm_asid);  | 
|---|
| 467 |  | -	else  | 
|---|
| 468 |  | -		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);  | 
|---|
| 469 |  | -}  | 
|---|
| 470 |  | -  | 
|---|
| 471 |  | -/*  | 
|---|
| 472 |  | - * flush everything  | 
|---|
| 473 |  | - */  | 
|---|
| 474 |  | -static inline void __flush_tlb_all(void)  | 
|---|
| 475 |  | -{  | 
|---|
| 476 |  | -	/*  | 
|---|
| 477 |  | -	 * This is to catch users with enabled preemption and the PGE feature  | 
|---|
| 478 |  | -	 * and don't trigger the warning in __native_flush_tlb().  | 
|---|
| 479 |  | -	 */  | 
|---|
| 480 |  | -	VM_WARN_ON_ONCE(preemptible());  | 
|---|
| 481 |  | -  | 
|---|
| 482 |  | -	if (boot_cpu_has(X86_FEATURE_PGE)) {  | 
|---|
| 483 |  | -		__flush_tlb_global();  | 
|---|
| 484 |  | -	} else {  | 
|---|
| 485 |  | -		/*  | 
|---|
| 486 |  | -		 * !PGE -> !PCID (setup_pcid()), thus every flush is total.  | 
|---|
| 487 |  | -		 */  | 
|---|
| 488 |  | -		__flush_tlb();  | 
|---|
| 489 |  | -	}  | 
|---|
| 490 |  | -}  | 
|---|
| 491 |  | -  | 
|---|
| 492 |  | -/*  | 
|---|
| 493 |  | - * flush one page in the kernel mapping  | 
|---|
| 494 |  | - */  | 
|---|
| 495 |  | -static inline void __flush_tlb_one_kernel(unsigned long addr)  | 
|---|
| 496 |  | -{  | 
|---|
| 497 |  | -	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);  | 
|---|
| 498 |  | -  | 
|---|
| 499 |  | -	/*  | 
|---|
| 500 |  | -	 * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its  | 
|---|
| 501 |  | -	 * paravirt equivalent.  Even with PCID, this is sufficient: we only  | 
|---|
| 502 |  | -	 * use PCID if we also use global PTEs for the kernel mapping, and  | 
|---|
| 503 |  | -	 * INVLPG flushes global translations across all address spaces.  | 
|---|
| 504 |  | -	 *  | 
|---|
| 505 |  | -	 * If PTI is on, then the kernel is mapped with non-global PTEs, and  | 
|---|
| 506 |  | -	 * __flush_tlb_one_user() will flush the given address for the current  | 
|---|
| 507 |  | -	 * kernel address space and for its usermode counterpart, but it does  | 
|---|
| 508 |  | -	 * not flush it for other address spaces.  | 
|---|
| 509 |  | -	 */  | 
|---|
| 510 |  | -	__flush_tlb_one_user(addr);  | 
|---|
| 511 |  | -  | 
|---|
| 512 |  | -	if (!static_cpu_has(X86_FEATURE_PTI))  | 
|---|
| 513 |  | -		return;  | 
|---|
| 514 |  | -  | 
|---|
| 515 |  | -	/*  | 
|---|
| 516 |  | -	 * See above.  We need to propagate the flush to all other address  | 
|---|
| 517 |  | -	 * spaces.  In principle, we only need to propagate it to kernelmode  | 
|---|
| 518 |  | -	 * address spaces, but the extra bookkeeping we would need is not  | 
|---|
| 519 |  | -	 * worth it.  | 
|---|
| 520 |  | -	 */  | 
|---|
| 521 |  | -	invalidate_other_asid();  | 
|---|
| 522 |  | -}  | 
|---|
| 523 |  | -  | 
|---|
| 524 |  | -#define TLB_FLUSH_ALL	-1UL  | 
|---|
| 525 | 169 |   | 
|---|
| 526 | 170 |  /* | 
|---|
| 527 | 171 |   * TLB flushing: | 
|---|
| .. | .. | 
|---|
| 557 | 201 |  	unsigned long		start; | 
|---|
| 558 | 202 |  	unsigned long		end; | 
|---|
| 559 | 203 |  	u64			new_tlb_gen; | 
|---|
 | 204 | +	unsigned int		stride_shift;  | 
|---|
 | 205 | +	bool			freed_tables;  | 
|---|
| 560 | 206 |  }; | 
|---|
| 561 | 207 |   | 
|---|
| 562 |  | -#define local_flush_tlb() __flush_tlb()  | 
|---|
 | 208 | +void flush_tlb_local(void);  | 
|---|
 | 209 | +void flush_tlb_one_user(unsigned long addr);  | 
|---|
 | 210 | +void flush_tlb_one_kernel(unsigned long addr);  | 
|---|
 | 211 | +void flush_tlb_others(const struct cpumask *cpumask,  | 
|---|
 | 212 | +		      const struct flush_tlb_info *info);  | 
|---|
| 563 | 213 |   | 
|---|
| 564 |  | -#define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)  | 
|---|
 | 214 | +#ifdef CONFIG_PARAVIRT  | 
|---|
 | 215 | +#include <asm/paravirt.h>  | 
|---|
 | 216 | +#endif  | 
|---|
| 565 | 217 |   | 
|---|
| 566 |  | -#define flush_tlb_range(vma, start, end)	\  | 
|---|
| 567 |  | -		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)  | 
|---|
 | 218 | +#define flush_tlb_mm(mm)						\  | 
|---|
 | 219 | +		flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)  | 
|---|
 | 220 | +  | 
|---|
 | 221 | +#define flush_tlb_range(vma, start, end)				\  | 
|---|
 | 222 | +	flush_tlb_mm_range((vma)->vm_mm, start, end,			\  | 
|---|
 | 223 | +			   ((vma)->vm_flags & VM_HUGETLB)		\  | 
|---|
 | 224 | +				? huge_page_shift(hstate_vma(vma))	\  | 
|---|
 | 225 | +				: PAGE_SHIFT, false)  | 
|---|
| 568 | 226 |   | 
|---|
| 569 | 227 |  extern void flush_tlb_all(void); | 
|---|
| 570 | 228 |  extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | 
|---|
| 571 |  | -				unsigned long end, unsigned long vmflag);  | 
|---|
 | 229 | +				unsigned long end, unsigned int stride_shift,  | 
|---|
 | 230 | +				bool freed_tables);  | 
|---|
| 572 | 231 |  extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); | 
|---|
| 573 | 232 |   | 
|---|
| 574 | 233 |  static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) | 
|---|
| 575 | 234 |  { | 
|---|
| 576 |  | -	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);  | 
|---|
 | 235 | +	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);  | 
|---|
| 577 | 236 |  } | 
|---|
| 578 |  | -  | 
|---|
| 579 |  | -void native_flush_tlb_others(const struct cpumask *cpumask,  | 
|---|
| 580 |  | -			     const struct flush_tlb_info *info);  | 
|---|
| 581 | 237 |   | 
|---|
| 582 | 238 |  static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) | 
|---|
| 583 | 239 |  { | 
|---|
| .. | .. | 
|---|
| 599 | 255 |   | 
|---|
| 600 | 256 |  extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); | 
|---|
| 601 | 257 |   | 
|---|
| 602 |  | -#ifndef CONFIG_PARAVIRT  | 
|---|
| 603 |  | -#define flush_tlb_others(mask, info)	\  | 
|---|
| 604 |  | -	native_flush_tlb_others(mask, info)  | 
|---|
| 605 |  | -  | 
|---|
| 606 |  | -#define paravirt_tlb_remove_table(tlb, page) \  | 
|---|
| 607 |  | -	tlb_remove_page(tlb, (void *)(page))  | 
|---|
| 608 |  | -#endif  | 
|---|
 | 258 | +#endif /* !MODULE */  | 
|---|
| 609 | 259 |   | 
|---|
| 610 | 260 |  #endif /* _ASM_X86_TLBFLUSH_H */ | 
|---|