hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/boot/compressed/head_64.S
....@@ -33,6 +33,7 @@
3333 #include <asm/processor-flags.h>
3434 #include <asm/asm-offsets.h>
3535 #include <asm/bootparam.h>
36
+#include <asm/desc_defs.h>
3637 #include "pgtable.h"
3738
3839 /*
....@@ -40,13 +41,37 @@
4041 */
4142 .hidden _bss
4243 .hidden _ebss
43
- .hidden _got
44
- .hidden _egot
4544 .hidden _end
4645
4746 __HEAD
47
+
48
+/*
49
+ * This macro gives the relative virtual address of X, i.e. the offset of X
50
+ * from startup_32. This is the same as the link-time virtual address of X,
51
+ * since startup_32 is at 0, but defining it this way tells the
52
+ * assembler/linker that we do not want the actual run-time address of X. This
53
+ * prevents the linker from trying to create unwanted run-time relocation
54
+ * entries for the reference when the compressed kernel is linked as PIE.
55
+ *
56
+ * A reference X(%reg) will result in the link-time VA of X being stored with
57
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
58
+ * adds the 64-bit base address where the kernel is loaded.
59
+ *
60
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
61
+ * and no run-time relocation.
62
+ *
63
+ * The macro should be used as a displacement with a base register containing
64
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
65
+ * [$ rva(X)].
66
+ *
67
+ * This macro can only be used from within the .head.text section, since the
68
+ * expression requires startup_32 to be in the same section as the code being
69
+ * assembled.
70
+ */
71
+#define rva(X) ((X) - startup_32)
72
+
4873 .code32
49
-ENTRY(startup_32)
74
+SYM_FUNC_START(startup_32)
5075 /*
5176 * 32bit entry is 0 and it is ABI so immutable!
5277 * If we come here directly from a bootloader,
....@@ -54,19 +79,7 @@
5479 * all need to be under the 4G limit.
5580 */
5681 cld
57
- /*
58
- * Test KEEP_SEGMENTS flag to see if the bootloader is asking
59
- * us to not reload segments
60
- */
61
- testb $KEEP_SEGMENTS, BP_loadflags(%esi)
62
- jnz 1f
63
-
6482 cli
65
- movl $(__BOOT_DS), %eax
66
- movl %eax, %ds
67
- movl %eax, %es
68
- movl %eax, %ss
69
-1:
7083
7184 /*
7285 * Calculate the delta between where we were compiled to run
....@@ -79,16 +92,27 @@
7992 leal (BP_scratch+4)(%esi), %esp
8093 call 1f
8194 1: popl %ebp
82
- subl $1b, %ebp
95
+ subl $ rva(1b), %ebp
96
+
97
+ /* Load new GDT with the 64bit segments using 32bit descriptor */
98
+ leal rva(gdt)(%ebp), %eax
99
+ movl %eax, 2(%eax)
100
+ lgdt (%eax)
101
+
102
+ /* Load segment registers with our descriptors */
103
+ movl $__BOOT_DS, %eax
104
+ movl %eax, %ds
105
+ movl %eax, %es
106
+ movl %eax, %fs
107
+ movl %eax, %gs
108
+ movl %eax, %ss
83109
84110 /* setup a stack and make sure cpu supports long mode. */
85
- movl $boot_stack_end, %eax
86
- addl %ebp, %eax
87
- movl %eax, %esp
111
+ leal rva(boot_stack_end)(%ebp), %esp
88112
89113 call verify_cpu
90114 testl %eax, %eax
91
- jnz no_longmode
115
+ jnz .Lno_longmode
92116
93117 /*
94118 * Compute the delta between where we were compiled to run at
....@@ -101,6 +125,19 @@
101125
102126 #ifdef CONFIG_RELOCATABLE
103127 movl %ebp, %ebx
128
+
129
+#ifdef CONFIG_EFI_STUB
130
+/*
131
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
132
+ * offset to the start of the space allocated for the image. efi_pe_entry will
133
+ * set up image_offset to tell us where the image actually starts, so that we
134
+ * can use the full available buffer.
135
+ * image_offset = startup_32 - image_base
136
+ * Otherwise image_offset will be zero and has no effect on the calculations.
137
+ */
138
+ subl rva(image_offset)(%ebp), %ebx
139
+#endif
140
+
104141 movl BP_kernel_alignment(%esi), %eax
105142 decl %eax
106143 addl %eax, %ebx
....@@ -113,17 +150,12 @@
113150 1:
114151
115152 /* Target address to relocate to for decompression */
116
- movl BP_init_size(%esi), %eax
117
- subl $_end, %eax
118
- addl %eax, %ebx
153
+ addl BP_init_size(%esi), %ebx
154
+ subl $ rva(_end), %ebx
119155
120156 /*
121157 * Prepare for entering 64 bit mode
122158 */
123
-
124
- /* Load new GDT with the 64bit segments using 32bit descriptor */
125
- addl %ebp, gdt+2(%ebp)
126
- lgdt gdt(%ebp)
127159
128160 /* Enable PAE mode */
129161 movl %cr4, %eax
....@@ -140,26 +172,36 @@
140172 */
141173 call get_sev_encryption_bit
142174 xorl %edx, %edx
175
+#ifdef CONFIG_AMD_MEM_ENCRYPT
143176 testl %eax, %eax
144177 jz 1f
145178 subl $32, %eax /* Encryption bit is always above bit 31 */
146179 bts %eax, %edx /* Set encryption mask for page tables */
180
+ /*
181
+ * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
182
+ * will do a check. The sev_status memory will be fully initialized
183
+ * with the contents of MSR_AMD_SEV_STATUS later in
184
+ * set_sev_encryption_mask(). For now it is sufficient to know that SEV
185
+ * is active.
186
+ */
187
+ movl $1, rva(sev_status)(%ebp)
147188 1:
189
+#endif
148190
149191 /* Initialize Page tables to 0 */
150
- leal pgtable(%ebx), %edi
192
+ leal rva(pgtable)(%ebx), %edi
151193 xorl %eax, %eax
152194 movl $(BOOT_INIT_PGT_SIZE/4), %ecx
153195 rep stosl
154196
155197 /* Build Level 4 */
156
- leal pgtable + 0(%ebx), %edi
198
+ leal rva(pgtable + 0)(%ebx), %edi
157199 leal 0x1007 (%edi), %eax
158200 movl %eax, 0(%edi)
159201 addl %edx, 4(%edi)
160202
161203 /* Build Level 3 */
162
- leal pgtable + 0x1000(%ebx), %edi
204
+ leal rva(pgtable + 0x1000)(%ebx), %edi
163205 leal 0x1007(%edi), %eax
164206 movl $4, %ecx
165207 1: movl %eax, 0x00(%edi)
....@@ -170,7 +212,7 @@
170212 jnz 1b
171213
172214 /* Build Level 2 */
173
- leal pgtable + 0x2000(%ebx), %edi
215
+ leal rva(pgtable + 0x2000)(%ebx), %edi
174216 movl $0x00000183, %eax
175217 movl $2048, %ecx
176218 1: movl %eax, 0(%edi)
....@@ -181,7 +223,7 @@
181223 jnz 1b
182224
183225 /* Enable the boot page tables */
184
- leal pgtable(%ebx), %eax
226
+ leal rva(pgtable)(%ebx), %eax
185227 movl %eax, %cr3
186228
187229 /* Enable Long mode in EFER (Extended Feature Enable Register) */
....@@ -206,15 +248,33 @@
206248 * We place all of the values on our mini stack so lret can
207249 * used to perform that far jump.
208250 */
209
- pushl $__KERNEL_CS
210
- leal startup_64(%ebp), %eax
251
+ leal rva(startup_64)(%ebp), %eax
211252 #ifdef CONFIG_EFI_MIXED
212
- movl efi32_config(%ebp), %ebx
213
- cmp $0, %ebx
253
+ movl rva(efi32_boot_args)(%ebp), %edi
254
+ cmp $0, %edi
214255 jz 1f
215
- leal handover_entry(%ebp), %eax
256
+ leal rva(efi64_stub_entry)(%ebp), %eax
257
+ movl rva(efi32_boot_args+4)(%ebp), %esi
258
+ movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
259
+ cmpl $0, %edx
260
+ jnz 1f
261
+ /*
262
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
263
+ * shadow space on the stack even if all arguments are passed in
264
+ * registers. We also need an additional 8 bytes for the space that
265
+ * would be occupied by the return address, and this also results in
266
+ * the correct stack alignment for entry.
267
+ */
268
+ subl $40, %esp
269
+ leal rva(efi_pe_entry)(%ebp), %eax
270
+ movl %edi, %ecx // MS calling convention
271
+ movl %esi, %edx
216272 1:
217273 #endif
274
+ /* Check if the C-bit position is correct when SEV is active */
275
+ call startup32_check_sev_cbit
276
+
277
+ pushl $__KERNEL_CS
218278 pushl %eax
219279
220280 /* Enter paged protected Mode, activating Long Mode */
....@@ -223,27 +283,30 @@
223283
224284 /* Jump from 32bit compatibility mode into 64bit mode. */
225285 lret
226
-ENDPROC(startup_32)
286
+SYM_FUNC_END(startup_32)
227287
228288 #ifdef CONFIG_EFI_MIXED
229289 .org 0x190
230
-ENTRY(efi32_stub_entry)
290
+SYM_FUNC_START(efi32_stub_entry)
231291 add $0x4, %esp /* Discard return address */
232292 popl %ecx
233293 popl %edx
234294 popl %esi
235295
236
- leal (BP_scratch+4)(%esi), %esp
237296 call 1f
238297 1: pop %ebp
239
- subl $1b, %ebp
298
+ subl $ rva(1b), %ebp
240299
241
- movl %ecx, efi32_config(%ebp)
242
- movl %edx, efi32_config+8(%ebp)
243
- sgdtl efi32_boot_gdt(%ebp)
300
+ movl %esi, rva(efi32_boot_args+8)(%ebp)
301
+SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
302
+ movl %ecx, rva(efi32_boot_args)(%ebp)
303
+ movl %edx, rva(efi32_boot_args+4)(%ebp)
304
+ movb $0, rva(efi_is64)(%ebp)
244305
245
- leal efi32_config(%ebp), %eax
246
- movl %eax, efi_config(%ebp)
306
+ /* Save firmware GDTR and code/data selectors */
307
+ sgdtl rva(efi32_boot_gdt)(%ebp)
308
+ movw %cs, rva(efi32_boot_cs)(%ebp)
309
+ movw %ds, rva(efi32_boot_ds)(%ebp)
247310
248311 /* Disable paging */
249312 movl %cr0, %eax
....@@ -251,12 +314,12 @@
251314 movl %eax, %cr0
252315
253316 jmp startup_32
254
-ENDPROC(efi32_stub_entry)
317
+SYM_FUNC_END(efi32_stub_entry)
255318 #endif
256319
257320 .code64
258321 .org 0x200
259
-ENTRY(startup_64)
322
+SYM_CODE_START(startup_64)
260323 /*
261324 * 64bit entry is 0x200 and it is ABI so immutable!
262325 * We come here either from startup_32 or directly from a
....@@ -267,6 +330,9 @@
267330 * that maps our entire kernel(text+data+bss+brk), zero page
268331 * and command line.
269332 */
333
+
334
+ cld
335
+ cli
270336
271337 /* Setup data segments. */
272338 xorl %eax, %eax
....@@ -292,6 +358,20 @@
292358 /* Start with the delta to where the kernel will run at. */
293359 #ifdef CONFIG_RELOCATABLE
294360 leaq startup_32(%rip) /* - $startup_32 */, %rbp
361
+
362
+#ifdef CONFIG_EFI_STUB
363
+/*
364
+ * If we were loaded via the EFI LoadImage service, startup_32 will be at an
365
+ * offset to the start of the space allocated for the image. efi_pe_entry will
366
+ * set up image_offset to tell us where the image actually starts, so that we
367
+ * can use the full available buffer.
368
+ * image_offset = startup_32 - image_base
369
+ * Otherwise image_offset will be zero and has no effect on the calculations.
370
+ */
371
+ movl image_offset(%rip), %eax
372
+ subq %rax, %rbp
373
+#endif
374
+
295375 movl BP_kernel_alignment(%rsi), %eax
296376 decl %eax
297377 addq %rax, %rbp
....@@ -305,30 +385,11 @@
305385
306386 /* Target address to relocate to for decompression */
307387 movl BP_init_size(%rsi), %ebx
308
- subl $_end, %ebx
388
+ subl $ rva(_end), %ebx
309389 addq %rbp, %rbx
310390
311391 /* Set up the stack */
312
- leaq boot_stack_end(%rbx), %rsp
313
-
314
- /*
315
- * paging_prepare() and cleanup_trampoline() below can have GOT
316
- * references. Adjust the table with address we are running at.
317
- *
318
- * Zero RAX for adjust_got: the GOT was not adjusted before;
319
- * there's no adjustment to undo.
320
- */
321
- xorq %rax, %rax
322
-
323
- /*
324
- * Calculate the address the binary is loaded at and use it as
325
- * a GOT adjustment.
326
- */
327
- call 1f
328
-1: popq %rdi
329
- subq $1b, %rdi
330
-
331
- call adjust_got
392
+ leaq rva(boot_stack_end)(%rbx), %rsp
332393
333394 /*
334395 * At this point we are in long mode with 4-level paging enabled,
....@@ -356,16 +417,31 @@
356417 */
357418
358419 /* Make sure we have GDT with 32-bit code segment */
359
- leaq gdt(%rip), %rax
360
- movq %rax, gdt64+2(%rip)
361
- lgdt gdt64(%rip)
420
+ leaq gdt64(%rip), %rax
421
+ addq %rax, 2(%rax)
422
+ lgdt (%rax)
423
+
424
+ /* Reload CS so IRET returns to a CS actually in the GDT */
425
+ pushq $__KERNEL_CS
426
+ leaq .Lon_kernel_cs(%rip), %rax
427
+ pushq %rax
428
+ lretq
429
+
430
+.Lon_kernel_cs:
431
+
432
+ pushq %rsi
433
+ call load_stage1_idt
434
+ popq %rsi
362435
363436 /*
364437 * paging_prepare() sets up the trampoline and checks if we need to
365438 * enable 5-level paging.
366439 *
367
- * Address of the trampoline is returned in RAX.
368
- * Non zero RDX on return means we need to enable 5-level paging.
440
+ * paging_prepare() returns a two-quadword structure which lands
441
+ * into RDX:RAX:
442
+ * - Address of the trampoline is returned in RAX.
443
+ * - Non zero RDX means trampoline needs to enable 5-level
444
+ * paging.
369445 *
370446 * RSI holds real mode data and needs to be preserved across
371447 * this function call.
....@@ -378,11 +454,25 @@
378454 /* Save the trampoline address in RCX */
379455 movq %rax, %rcx
380456
457
+ /* Set up 32-bit addressable stack */
458
+ leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
459
+
381460 /*
382
- * Load the address of trampoline_return() into RDI.
383
- * It will be used by the trampoline to return to the main code.
461
+ * Preserve live 64-bit registers on the stack: this is necessary
462
+ * because the architecture does not guarantee that GPRs will retain
463
+ * their full 64-bit values across a 32-bit mode switch.
464
+ */
465
+ pushq %rbp
466
+ pushq %rbx
467
+ pushq %rsi
468
+
469
+ /*
470
+ * Push the 64-bit address of trampoline_return() onto the new stack.
471
+ * It will be used by the trampoline to return to the main code. Due to
472
+ * the 32-bit mode switch, it cannot be kept it in a register either.
384473 */
385474 leaq trampoline_return(%rip), %rdi
475
+ pushq %rdi
386476
387477 /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
388478 pushq $__KERNEL32_CS
....@@ -390,8 +480,13 @@
390480 pushq %rax
391481 lretq
392482 trampoline_return:
483
+ /* Restore live 64-bit registers */
484
+ popq %rsi
485
+ popq %rbx
486
+ popq %rbp
487
+
393488 /* Restore the stack, the 32-bit trampoline uses its own stack */
394
- leaq boot_stack_end(%rbx), %rsp
489
+ leaq rva(boot_stack_end)(%rbx), %rsp
395490
396491 /*
397492 * cleanup_trampoline() would restore trampoline memory.
....@@ -403,7 +498,7 @@
403498 * this function call.
404499 */
405500 pushq %rsi
406
- leaq top_pgtable(%rbx), %rdi
501
+ leaq rva(top_pgtable)(%rbx), %rdi
407502 call cleanup_trampoline
408503 popq %rsi
409504
....@@ -411,110 +506,53 @@
411506 pushq $0
412507 popfq
413508
414
- /*
415
- * Previously we've adjusted the GOT with address the binary was
416
- * loaded at. Now we need to re-adjust for relocation address.
417
- *
418
- * Calculate the address the binary is loaded at, so that we can
419
- * undo the previous GOT adjustment.
420
- */
421
- call 1f
422
-1: popq %rax
423
- subq $1b, %rax
424
-
425
- /* The new adjustment is the relocation address */
426
- movq %rbx, %rdi
427
- call adjust_got
428
-
429509 /*
430510 * Copy the compressed kernel to the end of our buffer
431511 * where decompression in place becomes safe.
432512 */
433513 pushq %rsi
434514 leaq (_bss-8)(%rip), %rsi
435
- leaq (_bss-8)(%rbx), %rdi
436
- movq $_bss /* - $startup_32 */, %rcx
437
- shrq $3, %rcx
515
+ leaq rva(_bss-8)(%rbx), %rdi
516
+ movl $(_bss - startup_32), %ecx
517
+ shrl $3, %ecx
438518 std
439519 rep movsq
440520 cld
441521 popq %rsi
442522
523
+ /*
524
+ * The GDT may get overwritten either during the copy we just did or
525
+ * during extract_kernel below. To avoid any issues, repoint the GDTR
526
+ * to the new copy of the GDT.
527
+ */
528
+ leaq rva(gdt64)(%rbx), %rax
529
+ leaq rva(gdt)(%rbx), %rdx
530
+ movq %rdx, 2(%rax)
531
+ lgdt (%rax)
532
+
443533 /*
444534 * Jump to the relocated address.
445535 */
446
- leaq relocated(%rbx), %rax
536
+ leaq rva(.Lrelocated)(%rbx), %rax
447537 jmp *%rax
538
+SYM_CODE_END(startup_64)
448539
449540 #ifdef CONFIG_EFI_STUB
450
-
451
-/* The entry point for the PE/COFF executable is efi_pe_entry. */
452
-ENTRY(efi_pe_entry)
453
- movq %rcx, efi64_config(%rip) /* Handle */
454
- movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
455
-
456
- leaq efi64_config(%rip), %rax
457
- movq %rax, efi_config(%rip)
458
-
459
- call 1f
460
-1: popq %rbp
461
- subq $1b, %rbp
462
-
463
- /*
464
- * Relocate efi_config->call().
465
- */
466
- addq %rbp, efi64_config+40(%rip)
467
-
468
- movq %rax, %rdi
469
- call make_boot_params
470
- cmpq $0,%rax
471
- je fail
472
- mov %rax, %rsi
473
- leaq startup_32(%rip), %rax
474
- movl %eax, BP_code32_start(%rsi)
475
- jmp 2f /* Skip the relocation */
476
-
477
-handover_entry:
478
- call 1f
479
-1: popq %rbp
480
- subq $1b, %rbp
481
-
482
- /*
483
- * Relocate efi_config->call().
484
- */
485
- movq efi_config(%rip), %rax
486
- addq %rbp, 40(%rax)
487
-2:
488
- movq efi_config(%rip), %rdi
489
- call efi_main
490
- movq %rax,%rsi
491
- cmpq $0,%rax
492
- jne 2f
493
-fail:
494
- /* EFI init failed, so hang. */
495
- hlt
496
- jmp fail
497
-2:
498
- movl BP_code32_start(%esi), %eax
499
- leaq startup_64(%rax), %rax
500
- jmp *%rax
501
-ENDPROC(efi_pe_entry)
502
-
503541 .org 0x390
504
-ENTRY(efi64_stub_entry)
505
- movq %rdi, efi64_config(%rip) /* Handle */
506
- movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
507
-
508
- leaq efi64_config(%rip), %rax
509
- movq %rax, efi_config(%rip)
510
-
511
- movq %rdx, %rsi
512
- jmp handover_entry
513
-ENDPROC(efi64_stub_entry)
542
+SYM_FUNC_START(efi64_stub_entry)
543
+SYM_FUNC_START_ALIAS(efi_stub_entry)
544
+ and $~0xf, %rsp /* realign the stack */
545
+ movq %rdx, %rbx /* save boot_params pointer */
546
+ call efi_main
547
+ movq %rbx,%rsi
548
+ leaq rva(startup_64)(%rax), %rax
549
+ jmp *%rax
550
+SYM_FUNC_END(efi64_stub_entry)
551
+SYM_FUNC_END_ALIAS(efi_stub_entry)
514552 #endif
515553
516554 .text
517
-relocated:
555
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
518556
519557 /*
520558 * Clear BSS (stack is currently empty)
....@@ -527,15 +565,33 @@
527565 rep stosq
528566
529567 /*
568
+ * If running as an SEV guest, the encryption mask is required in the
569
+ * page-table setup code below. When the guest also has SEV-ES enabled
570
+ * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
571
+ * handler can't map its GHCB because the page-table is not set up yet.
572
+ * So set up the encryption mask here while still on the stage1 #VC
573
+ * handler. Then load stage2 IDT and switch to the kernel's own
574
+ * page-table.
575
+ */
576
+ pushq %rsi
577
+ call set_sev_encryption_mask
578
+ call load_stage2_idt
579
+
580
+ /* Pass boot_params to initialize_identity_maps() */
581
+ movq (%rsp), %rdi
582
+ call initialize_identity_maps
583
+ popq %rsi
584
+
585
+/*
530586 * Do the extraction, and jump to the new kernel..
531587 */
532588 pushq %rsi /* Save the real mode argument */
533589 movq %rsi, %rdi /* real mode address */
534590 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
535591 leaq input_data(%rip), %rdx /* input_data */
536
- movl $z_input_len, %ecx /* input_len */
592
+ movl input_len(%rip), %ecx /* input_len */
537593 movq %rbp, %r8 /* output target address */
538
- movq $z_output_len, %r9 /* decompressed length, end of relocs */
594
+ movl output_len(%rip), %r9d /* decompressed length, end of relocs */
539595 call extract_kernel /* returns kernel location in %rax */
540596 popq %rsi
541597
....@@ -543,44 +599,21 @@
543599 * Jump to the decompressed kernel.
544600 */
545601 jmp *%rax
546
-
547
-/*
548
- * Adjust the global offset table
549
- *
550
- * RAX is the previous adjustment of the table to undo (use 0 if it's the
551
- * first time we touch GOT).
552
- * RDI is the new adjustment to apply.
553
- */
554
-adjust_got:
555
- /* Walk through the GOT adding the address to the entries */
556
- leaq _got(%rip), %rdx
557
- leaq _egot(%rip), %rcx
558
-1:
559
- cmpq %rcx, %rdx
560
- jae 2f
561
- subq %rax, (%rdx) /* Undo previous adjustment */
562
- addq %rdi, (%rdx) /* Apply the new adjustment */
563
- addq $8, %rdx
564
- jmp 1b
565
-2:
566
- ret
602
+SYM_FUNC_END(.Lrelocated)
567603
568604 .code32
569605 /*
570606 * This is the 32-bit trampoline that will be copied over to low memory.
571607 *
572
- * RDI contains the return address (might be above 4G).
608
+ * Return address is at the top of the stack (might be above 4G).
573609 * ECX contains the base address of the trampoline memory.
574
- * Non zero RDX on return means we need to enable 5-level paging.
610
+ * Non zero RDX means trampoline needs to enable 5-level paging.
575611 */
576
-ENTRY(trampoline_32bit_src)
612
+SYM_CODE_START(trampoline_32bit_src)
577613 /* Set up data and stack segments */
578614 movl $__KERNEL_DS, %eax
579615 movl %eax, %ds
580616 movl %eax, %ss
581
-
582
- /* Set up new stack */
583
- leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
584617
585618 /* Disable paging */
586619 movl %cr0, %eax
....@@ -625,7 +658,7 @@
625658 movl %eax, %cr4
626659
627660 /* Calculate address of paging_enabled() once we are executing in the trampoline */
628
- leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
661
+ leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
629662
630663 /* Prepare the stack for far return to Long Mode */
631664 pushl $__KERNEL_CS
....@@ -636,11 +669,13 @@
636669 movl %eax, %cr0
637670
638671 lret
672
+SYM_CODE_END(trampoline_32bit_src)
639673
640674 .code64
641
-paging_enabled:
675
+SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
642676 /* Return from the trampoline */
643
- jmp *%rdi
677
+ retq
678
+SYM_FUNC_END(.Lpaging_enabled)
644679
645680 /*
646681 * The trampoline code has a size limit.
....@@ -650,72 +685,229 @@
650685 .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
651686
652687 .code32
653
-no_longmode:
688
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
654689 /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
655690 1:
656691 hlt
657692 jmp 1b
693
+SYM_FUNC_END(.Lno_longmode)
658694
659695 #include "../../kernel/verify_cpu.S"
660696
661697 .data
662
-gdt64:
663
- .word gdt_end - gdt
698
+SYM_DATA_START_LOCAL(gdt64)
699
+ .word gdt_end - gdt - 1
700
+ .quad gdt - gdt64
701
+SYM_DATA_END(gdt64)
702
+ .balign 8
703
+SYM_DATA_START_LOCAL(gdt)
704
+ .word gdt_end - gdt - 1
664705 .long 0
665
- .word 0
666
- .quad 0
667
-gdt:
668
- .word gdt_end - gdt
669
- .long gdt
670706 .word 0
671707 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
672708 .quad 0x00af9a000000ffff /* __KERNEL_CS */
673709 .quad 0x00cf92000000ffff /* __KERNEL_DS */
674710 .quad 0x0080890000000000 /* TS descriptor */
675711 .quad 0x0000000000000000 /* TS continued */
676
-gdt_end:
712
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
713
+
714
+SYM_DATA_START(boot_idt_desc)
715
+ .word boot_idt_end - boot_idt - 1
716
+ .quad 0
717
+SYM_DATA_END(boot_idt_desc)
718
+ .balign 8
719
+SYM_DATA_START(boot_idt)
720
+ .rept BOOT_IDT_ENTRIES
721
+ .quad 0
722
+ .quad 0
723
+ .endr
724
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
677725
678726 #ifdef CONFIG_EFI_STUB
679
-efi_config:
680
- .quad 0
681
-
727
+SYM_DATA(image_offset, .long 0)
728
+#endif
682729 #ifdef CONFIG_EFI_MIXED
683
- .global efi32_config
684
-efi32_config:
685
- .fill 5,8,0
686
- .quad efi64_thunk
687
- .byte 0
730
+SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
731
+SYM_DATA(efi_is64, .byte 1)
732
+
733
+#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
734
+#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
735
+#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
736
+
737
+ __HEAD
738
+ .code32
739
+SYM_FUNC_START(efi32_pe_entry)
740
+/*
741
+ * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
742
+ * efi_system_table_32_t *sys_table)
743
+ */
744
+
745
+ pushl %ebp
746
+ movl %esp, %ebp
747
+ pushl %eax // dummy push to allocate loaded_image
748
+
749
+ pushl %ebx // save callee-save registers
750
+ pushl %edi
751
+
752
+ call verify_cpu // check for long mode support
753
+ testl %eax, %eax
754
+ movl $0x80000003, %eax // EFI_UNSUPPORTED
755
+ jnz 2f
756
+
757
+ call 1f
758
+1: pop %ebx
759
+ subl $ rva(1b), %ebx
760
+
761
+ /* Get the loaded image protocol pointer from the image handle */
762
+ leal -4(%ebp), %eax
763
+ pushl %eax // &loaded_image
764
+ leal rva(loaded_image_proto)(%ebx), %eax
765
+ pushl %eax // pass the GUID address
766
+ pushl 8(%ebp) // pass the image handle
767
+
768
+ /*
769
+ * Note the alignment of the stack frame.
770
+ * sys_table
771
+ * handle <-- 16-byte aligned on entry by ABI
772
+ * return address
773
+ * frame pointer
774
+ * loaded_image <-- local variable
775
+ * saved %ebx <-- 16-byte aligned here
776
+ * saved %edi
777
+ * &loaded_image
778
+ * &loaded_image_proto
779
+ * handle <-- 16-byte aligned for call to handle_protocol
780
+ */
781
+
782
+ movl 12(%ebp), %eax // sys_table
783
+ movl ST32_boottime(%eax), %eax // sys_table->boottime
784
+ call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
785
+ addl $12, %esp // restore argument space
786
+ testl %eax, %eax
787
+ jnz 2f
788
+
789
+ movl 8(%ebp), %ecx // image_handle
790
+ movl 12(%ebp), %edx // sys_table
791
+ movl -4(%ebp), %esi // loaded_image
792
+ movl LI32_image_base(%esi), %esi // loaded_image->image_base
793
+ movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry
794
+ /*
795
+ * We need to set the image_offset variable here since startup_32() will
796
+ * use it before we get to the 64-bit efi_pe_entry() in C code.
797
+ */
798
+ subl %esi, %ebx
799
+ movl %ebx, rva(image_offset)(%ebp) // save image_offset
800
+ jmp efi32_pe_stub_entry
801
+
802
+2: popl %edi // restore callee-save registers
803
+ popl %ebx
804
+ leave
805
+ RET
806
+SYM_FUNC_END(efi32_pe_entry)
807
+
808
+ .section ".rodata"
809
+ /* EFI loaded image protocol GUID */
810
+ .balign 4
811
+SYM_DATA_START_LOCAL(loaded_image_proto)
812
+ .long 0x5b1b31a1
813
+ .word 0x9562, 0x11d2
814
+ .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
815
+SYM_DATA_END(loaded_image_proto)
688816 #endif
689817
690
- .global efi64_config
691
-efi64_config:
692
- .fill 5,8,0
693
- .quad efi_call
694
- .byte 1
695
-#endif /* CONFIG_EFI_STUB */
818
+/*
819
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
820
+ *
821
+ * The check makes use of the fact that all memory is encrypted when paging is
822
+ * disabled. The function creates 64 bits of random data using the RDRAND
823
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
824
+ * hypervisor violates that the kernel will crash right here.
825
+ *
826
+ * The 64 bits of random data are stored to a memory location and at the same
827
+ * time kept in the %eax and %ebx registers. Since encryption is always active
828
+ * when paging is off the random data will be stored encrypted in main memory.
829
+ *
830
+ * Then paging is enabled. When the C-bit position is correct all memory is
831
+ * still mapped encrypted and comparing the register values with memory will
832
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
833
+ * the compare will use the encrypted random data and fail.
834
+ */
835
+ __HEAD
836
+ .code32
837
+SYM_FUNC_START(startup32_check_sev_cbit)
838
+#ifdef CONFIG_AMD_MEM_ENCRYPT
839
+ pushl %eax
840
+ pushl %ebx
841
+ pushl %ecx
842
+ pushl %edx
843
+
844
+ /* Check for non-zero sev_status */
845
+ movl rva(sev_status)(%ebp), %eax
846
+ testl %eax, %eax
847
+ jz 4f
848
+
849
+ /*
850
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
851
+ * because it is better to prevent forward progress if no random value
852
+ * can be gathered.
853
+ */
854
+1: rdrand %eax
855
+ jnc 1b
856
+2: rdrand %ebx
857
+ jnc 2b
858
+
859
+ /* Store to memory and keep it in the registers */
860
+ movl %eax, rva(sev_check_data)(%ebp)
861
+ movl %ebx, rva(sev_check_data+4)(%ebp)
862
+
863
+ /* Enable paging to see if encryption is active */
864
+ movl %cr0, %edx /* Backup %cr0 in %edx */
865
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
866
+ movl %ecx, %cr0
867
+
868
+ cmpl %eax, rva(sev_check_data)(%ebp)
869
+ jne 3f
870
+ cmpl %ebx, rva(sev_check_data+4)(%ebp)
871
+ jne 3f
872
+
873
+ movl %edx, %cr0 /* Restore previous %cr0 */
874
+
875
+ jmp 4f
876
+
877
+3: /* Check failed - hlt the machine */
878
+ hlt
879
+ jmp 3b
880
+
881
+4:
882
+ popl %edx
883
+ popl %ecx
884
+ popl %ebx
885
+ popl %eax
886
+#endif
887
+ RET
888
+SYM_FUNC_END(startup32_check_sev_cbit)
696889
697890 /*
698891 * Stack and heap for uncompression
699892 */
700893 .bss
701894 .balign 4
702
-boot_heap:
703
- .fill BOOT_HEAP_SIZE, 1, 0
704
-boot_stack:
895
+SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
896
+
897
+SYM_DATA_START_LOCAL(boot_stack)
705898 .fill BOOT_STACK_SIZE, 1, 0
706
-boot_stack_end:
899
+ .balign 16
900
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
707901
708902 /*
709903 * Space for page tables (not in .bss so not zeroed)
710904 */
711
- .section ".pgtable","a",@nobits
905
+ .section ".pgtable","aw",@nobits
712906 .balign 4096
713
-pgtable:
714
- .fill BOOT_PGT_SIZE, 1, 0
907
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
715908
716909 /*
717910 * The page table is going to be used instead of page table in the trampoline
718911 * memory.
719912 */
720
-top_pgtable:
721
- .fill PAGE_SIZE, 1, 0
913
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)