x86: implement kernel page table isolation
Implement a set of per-cpu trampoline stacks which all interrupts and exceptions will initially land on, and also as an intermediate stack for privilege changes as we need some stack space to swap page tables. Set up the special trampoline page which contains all the trampoline stacks, TSS, and GDT. This page needs to be present in the user page tables or interrupts don't work. CPU exceptions, with KPTI turned on, are treated as interrupts and not traps so that we have IRQs locked on exception entry. Add some additional macros for defining IDT entries. Add special handling of locore text/rodata sections when creating user mode page tables on x86-64. Restore qemu_x86_64 to use KPTI, and remove restrictions on enabling user mode on x86-64. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
cc45266fdc
commit
e34f1cee06
|
@ -17,14 +17,14 @@ config CPU_ATOM
|
|||
bool
|
||||
select CPU_HAS_FPU
|
||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||
select ARCH_HAS_USERSPACE if X86_MMU && (!X86_64 || (X86_64 && X86_NO_MELTDOWN))
|
||||
select ARCH_HAS_USERSPACE if X86_MMU
|
||||
help
|
||||
This option signifies the use of a CPU from the Atom family.
|
||||
|
||||
config CPU_MINUTEIA
|
||||
bool
|
||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||
select ARCH_HAS_USERSPACE if X86_MMU && (!X86_64 || (X86_64 && X86_NO_MELTDOWN))
|
||||
select ARCH_HAS_USERSPACE if X86_MMU
|
||||
help
|
||||
This option signifies the use of a CPU from the Minute IA family.
|
||||
|
||||
|
@ -32,7 +32,7 @@ config CPU_APOLLO_LAKE
|
|||
bool
|
||||
select CPU_HAS_FPU
|
||||
select ARCH_HAS_STACK_PROTECTION if X86_MMU
|
||||
select ARCH_HAS_USERSPACE if X86_MMU && (!X86_64 || (X86_64 && X86_NO_MELTDOWN))
|
||||
select ARCH_HAS_USERSPACE if X86_MMU
|
||||
help
|
||||
This option signifies the use of a CPU from the Apollo Lake family.
|
||||
|
||||
|
@ -270,7 +270,6 @@ config X86_KPTI
|
|||
default y
|
||||
depends on USERSPACE
|
||||
depends on !X86_NO_MELTDOWN
|
||||
depends on !X86_64
|
||||
help
|
||||
Implements kernel page table isolation to mitigate Meltdown exploits
|
||||
to read Kernel RAM. Incurs a significant performance cost for
|
||||
|
|
|
@ -26,8 +26,18 @@ extern u8_t _exception_stack1[];
|
|||
extern u8_t _exception_stack2[];
|
||||
extern u8_t _exception_stack3[];
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
extern u8_t z_x86_trampoline_stack[];
|
||||
extern u8_t z_x86_trampoline_stack1[];
|
||||
extern u8_t z_x86_trampoline_stack2[];
|
||||
extern u8_t z_x86_trampoline_stack3[];
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
Z_GENERIC_SECTION(.tss)
|
||||
struct x86_tss64 tss0 = {
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
.ist2 = (u64_t) z_x86_trampoline_stack + Z_X86_TRAMPOLINE_STACK_SIZE,
|
||||
#endif
|
||||
.ist7 = (u64_t) _exception_stack + CONFIG_EXCEPTION_STACK_SIZE,
|
||||
.iomapb = 0xFFFF,
|
||||
.cpu = &(_kernel.cpus[0])
|
||||
|
@ -36,6 +46,9 @@ struct x86_tss64 tss0 = {
|
|||
#if CONFIG_MP_NUM_CPUS > 1
|
||||
Z_GENERIC_SECTION(.tss)
|
||||
struct x86_tss64 tss1 = {
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
.ist2 = (u64_t) z_x86_trampoline_stack1 + Z_X86_TRAMPOLINE_STACK_SIZE,
|
||||
#endif
|
||||
.ist7 = (u64_t) _exception_stack1 + CONFIG_EXCEPTION_STACK_SIZE,
|
||||
.iomapb = 0xFFFF,
|
||||
.cpu = &(_kernel.cpus[1])
|
||||
|
@ -45,6 +58,9 @@ struct x86_tss64 tss1 = {
|
|||
#if CONFIG_MP_NUM_CPUS > 2
|
||||
Z_GENERIC_SECTION(.tss)
|
||||
struct x86_tss64 tss2 = {
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
.ist2 = (u64_t) z_x86_trampoline_stack2 + Z_X86_TRAMPOLINE_STACK_SIZE,
|
||||
#endif
|
||||
.ist7 = (u64_t) _exception_stack2 + CONFIG_EXCEPTION_STACK_SIZE,
|
||||
.iomapb = 0xFFFF,
|
||||
.cpu = &(_kernel.cpus[2])
|
||||
|
@ -54,6 +70,9 @@ struct x86_tss64 tss2 = {
|
|||
#if CONFIG_MP_NUM_CPUS > 3
|
||||
Z_GENERIC_SECTION(.tss)
|
||||
struct x86_tss64 tss3 = {
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
.ist2 = (u64_t) z_x86_trampoline_stack3 + Z_X86_TRAMPOLINE_STACK_SIZE,
|
||||
#endif
|
||||
.ist7 = (u64_t) _exception_stack3 + CONFIG_EXCEPTION_STACK_SIZE,
|
||||
.iomapb = 0xFFFF,
|
||||
.cpu = &(_kernel.cpus[3])
|
||||
|
|
|
@ -232,15 +232,21 @@ z_x86_switch:
|
|||
|
||||
__resume:
|
||||
#ifdef CONFIG_USERSPACE
|
||||
#ifndef CONFIG_X86_KPTI
|
||||
/* If KPTI is enabled we're always on the kernel's page tables in
|
||||
* this context and the appropriate page table switch takes place
|
||||
* when trampolining back to user mode
|
||||
*/
|
||||
pushq %rdi /* Caller-saved, stash it */
|
||||
call z_x86_swap_update_page_tables
|
||||
popq %rdi
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
/* Set up exception return stack frame */
|
||||
pushq _thread_offset_to_ss(%rdi) /* SS */
|
||||
#else
|
||||
pushq $X86_KERNEL_DS /* SS */
|
||||
#endif
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
pushq _thread_offset_to_rsp(%rdi) /* RSP */
|
||||
pushq _thread_offset_to_rflags(%rdi) /* RFLAGS */
|
||||
#ifdef CONFIG_USERSPACE
|
||||
|
@ -256,6 +262,13 @@ __resume:
|
|||
movq _thread_offset_to_r13(%rdi), %r13
|
||||
movq _thread_offset_to_r14(%rdi), %r14
|
||||
movq _thread_offset_to_r15(%rdi), %r15
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Set correct privilege elevation stack to manually switch to in
|
||||
* z_x86_syscall_entry_stub()
|
||||
*/
|
||||
movq _thread_offset_to_psp(%rdi), %rax
|
||||
movq %rax, %gs:__x86_tss64_t_psp_OFFSET
|
||||
#endif
|
||||
|
||||
testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%rdi)
|
||||
jz 1f
|
||||
|
@ -275,7 +288,11 @@ __resume:
|
|||
/* Swap GS register values if we are returning to user mode */
|
||||
testb $0x3, 8(%rsp)
|
||||
jz 1f
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
jmp z_x86_trampoline_to_user
|
||||
#else
|
||||
swapgs
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
1:
|
||||
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
|
||||
|
@ -290,22 +307,66 @@ __resume:
|
|||
#define EXCEPT_CODE(nr) vector_ ## nr: pushq $nr; jmp except
|
||||
#define EXCEPT(nr) vector_ ## nr: pushq $0; pushq $nr; jmp except
|
||||
|
||||
/*
|
||||
* When we arrive at 'except' from one of the EXCEPT(X) stubs,
|
||||
* we're on the exception stack with irqs unlocked (or the trampoline stack
|
||||
* with irqs locked if KPTI is enabled) and it contains:
|
||||
*
|
||||
* SS
|
||||
* RSP
|
||||
* RFLAGS
|
||||
* CS
|
||||
* RIP
|
||||
* Error Code if pushed by CPU, else 0
|
||||
* Vector number <- RSP points here
|
||||
*
|
||||
*/
|
||||
|
||||
except: /*
|
||||
* finish struct NANO_ESF on stack. 'vector' .. 'ss' are
|
||||
* already there from hardware trap and EXCEPT_*() stub.
|
||||
*/
|
||||
|
||||
pushq %r15
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Swap GS register values if we came in from user mode */
|
||||
testb $0x3, 24(%rsp)
|
||||
/* Swap GS register values and page tables if we came from user mode */
|
||||
testb $0x3, 32(%rsp)
|
||||
jz 1f
|
||||
swapgs
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Load kernel's page table */
|
||||
movq $z_x86_kernel_ptables, %r15
|
||||
movq %r15, %cr3
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
1:
|
||||
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
|
||||
/* swapgs variant of Spectre V1. Disable speculation past this point */
|
||||
lfence
|
||||
#endif /* CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION */
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Save old trampoline stack pointer in R15 */
|
||||
movq %rsp, %r15
|
||||
|
||||
/* Switch to the exception stack */
|
||||
movq %gs:__x86_tss64_t_ist7_OFFSET, %rsp
|
||||
|
||||
/* Transplant trampoline stack contents */
|
||||
pushq 56(%r15) /* SS */
|
||||
pushq 48(%r15) /* RSP */
|
||||
pushq 40(%r15) /* RFLAGS */
|
||||
pushq 32(%r15) /* CS */
|
||||
pushq 24(%r15) /* RIP */
|
||||
pushq 16(%r15) /* Error code */
|
||||
pushq 8(%r15) /* Vector */
|
||||
pushq (%r15) /* Stashed R15 */
|
||||
movq $0, (%r15) /* Cover our tracks */
|
||||
|
||||
/* We're done, it's safe to re-enable interrupts. */
|
||||
sti
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
pushq %r15
|
||||
|
||||
subq $X86_FXSAVE_SIZE, %rsp
|
||||
fxsave (%rsp)
|
||||
pushq %r14
|
||||
|
@ -359,7 +420,11 @@ except: /*
|
|||
testb $0x3, 8(%rsp)
|
||||
jz 1f
|
||||
cli
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
jmp z_x86_trampoline_to_user
|
||||
#else
|
||||
swapgs
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
1:
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
|
@ -381,7 +446,8 @@ EXCEPT(Z_X86_OOPS_VECTOR);
|
|||
|
||||
/*
|
||||
* When we arrive at 'irq' from one of the IRQ(X) stubs,
|
||||
* we're on the "freshest" IRQ stack and it contains:
|
||||
* we're on the "freshest" IRQ stack (or the trampoline stack if we came from
|
||||
* user mode and KPTI is enabled) and it contains:
|
||||
*
|
||||
* SS
|
||||
* RSP
|
||||
|
@ -389,25 +455,48 @@ EXCEPT(Z_X86_OOPS_VECTOR);
|
|||
* CS
|
||||
* RIP
|
||||
* (vector number - IV_IRQS) <-- RSP points here
|
||||
* RSI <-- we push this on entry
|
||||
*/
|
||||
|
||||
.globl x86_irq_funcs /* see irq_manage.c .. */
|
||||
.globl x86_irq_args /* .. for these definitions */
|
||||
|
||||
irq:
|
||||
pushq %rsi
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
/* Swap GS register values if we came in from user mode */
|
||||
testb $0x3, 16(%rsp)
|
||||
testb $0x3, 24(%rsp)
|
||||
jz 1f
|
||||
swapgs
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Load kernel's page table */
|
||||
movq $z_x86_kernel_ptables, %rsi
|
||||
movq %rsi, %cr3
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
1:
|
||||
#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
|
||||
/* swapgs variant of Spectre V1. Disable speculation past this point */
|
||||
lfence
|
||||
#endif /* CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION */
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Save old trampoline stack pointer in RSI */
|
||||
movq %rsp, %rsi
|
||||
|
||||
/* Switch to the interrupt stack stack */
|
||||
movq %gs:__x86_tss64_t_ist1_OFFSET, %rsp
|
||||
|
||||
/* Transplant trampoline stack contents */
|
||||
pushq 48(%rsi) /* SS */
|
||||
pushq 40(%rsi) /* RSP */
|
||||
pushq 32(%rsi) /* RFLAGS */
|
||||
pushq 24(%rsi) /* CS */
|
||||
pushq 16(%rsi) /* RIP */
|
||||
pushq 8(%rsi) /* Vector */
|
||||
pushq (%rsi) /* Stashed RSI value */
|
||||
movq $0, (%rsi) /* Cover our tracks, stashed RSI might be sensitive */
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
pushq %rsi
|
||||
|
||||
movq %gs:__x86_tss64_t_cpu_OFFSET, %rsi
|
||||
|
||||
/*
|
||||
|
@ -564,84 +653,178 @@ IRQ(248); IRQ(249); IRQ(250); IRQ(251); IRQ(252); IRQ(253); IRQ(254); IRQ(255)
|
|||
* IDT.
|
||||
*/
|
||||
|
||||
#define TRAP 0x8f
|
||||
#define INTR 0x8e
|
||||
#define USER_INTR 0xee
|
||||
/* Descriptor type. Traps don't implicitly disable interrupts. User variants
|
||||
* can be invoked by software running in user mode (ring 3).
|
||||
*
|
||||
* For KPTI everything lands on the trampoline stack and we must get off of
|
||||
* it before re-enabling interrupts; use interrupt gates for everything.
|
||||
*/
|
||||
#define INTR 0x8e
|
||||
#define USER_INTR 0xee
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
#define TRAP INTR
|
||||
#define USER_TRAP UINTR
|
||||
#else
|
||||
#define TRAP 0x8f
|
||||
#define USER_TRAP 0xef
|
||||
#endif
|
||||
|
||||
#define IDT(nr, type, ist) \
|
||||
.word vector_ ## nr, X86_KERNEL_CS; \
|
||||
.byte ist, type; \
|
||||
.word 0, 0, 0, 0, 0
|
||||
|
||||
/* Which IST entry in TSS to use for automatic stack switching, or 0 if
|
||||
* no automatic switch is to take place. Stack page must be present in
|
||||
* the current page tables, if KPTI is on only the trampoline stack and
|
||||
* the current user stack can be accessed.
|
||||
*/
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Everything lands on ist2, which is set to the trampoline stack.
|
||||
* Interrupt/exception entry updates page tables and manually switches to
|
||||
* the irq/exception stacks stored in ist1/ist7
|
||||
*/
|
||||
#define IRQ_STACK 2
|
||||
#define EXC_STACK 2
|
||||
#define BAD_STACK 2
|
||||
#else
|
||||
#define IRQ_STACK 1
|
||||
#define EXC_STACK 7
|
||||
#define BAD_STACK 7 /* Horrible things: NMIs, double faults, MCEs */
|
||||
#endif
|
||||
|
||||
.align 16
|
||||
idt:
|
||||
IDT( 0, TRAP, 7); IDT( 1, TRAP, 7); IDT( 2, TRAP, 7); IDT( 3, TRAP, 7)
|
||||
IDT( 4, TRAP, 7); IDT( 5, TRAP, 7); IDT( 6, TRAP, 7); IDT( 7, TRAP, 7)
|
||||
IDT( 8, TRAP, 7); IDT( 9, TRAP, 7); IDT( 10, TRAP, 7); IDT( 11, TRAP, 7)
|
||||
IDT( 12, TRAP, 7); IDT( 13, TRAP, 7); IDT( 14, TRAP, 7); IDT( 15, TRAP, 7)
|
||||
IDT( 16, TRAP, 7); IDT( 17, TRAP, 7); IDT( 18, TRAP, 7); IDT( 19, TRAP, 7)
|
||||
IDT( 20, TRAP, 7); IDT( 21, TRAP, 7); IDT( 22, TRAP, 7); IDT( 23, TRAP, 7)
|
||||
IDT( 24, TRAP, 7); IDT( 25, TRAP, 7); IDT( 26, TRAP, 7); IDT( 27, TRAP, 7)
|
||||
IDT( 28, TRAP, 7); IDT( 29, TRAP, 7); IDT( 30, TRAP, 7); IDT( 31, TRAP, 7)
|
||||
IDT( 0, TRAP, EXC_STACK); IDT( 1, TRAP, EXC_STACK)
|
||||
IDT( 2, TRAP, BAD_STACK); IDT( 3, TRAP, EXC_STACK)
|
||||
IDT( 4, TRAP, EXC_STACK); IDT( 5, TRAP, EXC_STACK)
|
||||
IDT( 6, TRAP, EXC_STACK); IDT( 7, TRAP, EXC_STACK)
|
||||
IDT( 8, TRAP, BAD_STACK); IDT( 9, TRAP, EXC_STACK)
|
||||
IDT( 10, TRAP, EXC_STACK); IDT( 11, TRAP, EXC_STACK)
|
||||
IDT( 12, TRAP, EXC_STACK); IDT( 13, TRAP, EXC_STACK)
|
||||
IDT( 14, TRAP, EXC_STACK); IDT( 15, TRAP, EXC_STACK)
|
||||
IDT( 16, TRAP, EXC_STACK); IDT( 17, TRAP, EXC_STACK)
|
||||
IDT( 18, TRAP, BAD_STACK); IDT( 19, TRAP, EXC_STACK)
|
||||
IDT( 20, TRAP, EXC_STACK); IDT( 21, TRAP, EXC_STACK)
|
||||
IDT( 22, TRAP, EXC_STACK); IDT( 23, TRAP, EXC_STACK)
|
||||
IDT( 24, TRAP, EXC_STACK); IDT( 25, TRAP, EXC_STACK)
|
||||
IDT( 26, TRAP, EXC_STACK); IDT( 27, TRAP, EXC_STACK)
|
||||
IDT( 28, TRAP, EXC_STACK); IDT( 29, TRAP, EXC_STACK)
|
||||
IDT( 30, TRAP, EXC_STACK); IDT( 31, TRAP, EXC_STACK)
|
||||
|
||||
/* Oops vector can be invoked from Ring 3 and runs on exception stack */
|
||||
IDT(Z_X86_OOPS_VECTOR, USER_INTR, 7);
|
||||
IDT( 33, INTR, 1); IDT( 34, INTR, 1); IDT( 35, INTR, 1)
|
||||
IDT( 36, INTR, 1); IDT( 37, INTR, 1); IDT( 38, INTR, 1); IDT( 39, INTR, 1)
|
||||
IDT( 40, INTR, 1); IDT( 41, INTR, 1); IDT( 42, INTR, 1); IDT( 43, INTR, 1)
|
||||
IDT( 44, INTR, 1); IDT( 45, INTR, 1); IDT( 46, INTR, 1); IDT( 47, INTR, 1)
|
||||
IDT( 48, INTR, 1); IDT( 49, INTR, 1); IDT( 50, INTR, 1); IDT( 51, INTR, 1)
|
||||
IDT( 52, INTR, 1); IDT( 53, INTR, 1); IDT( 54, INTR, 1); IDT( 55, INTR, 1)
|
||||
IDT( 56, INTR, 1); IDT( 57, INTR, 1); IDT( 58, INTR, 1); IDT( 59, INTR, 1)
|
||||
IDT( 60, INTR, 1); IDT( 61, INTR, 1); IDT( 62, INTR, 1); IDT( 63, INTR, 1)
|
||||
IDT( 64, INTR, 1); IDT( 65, INTR, 1); IDT( 66, INTR, 1); IDT( 67, INTR, 1)
|
||||
IDT( 68, INTR, 1); IDT( 69, INTR, 1); IDT( 70, INTR, 1); IDT( 71, INTR, 1)
|
||||
IDT( 72, INTR, 1); IDT( 73, INTR, 1); IDT( 74, INTR, 1); IDT( 75, INTR, 1)
|
||||
IDT( 76, INTR, 1); IDT( 77, INTR, 1); IDT( 78, INTR, 1); IDT( 79, INTR, 1)
|
||||
IDT( 80, INTR, 1); IDT( 81, INTR, 1); IDT( 82, INTR, 1); IDT( 83, INTR, 1)
|
||||
IDT( 84, INTR, 1); IDT( 85, INTR, 1); IDT( 86, INTR, 1); IDT( 87, INTR, 1)
|
||||
IDT( 88, INTR, 1); IDT( 89, INTR, 1); IDT( 90, INTR, 1); IDT( 91, INTR, 1)
|
||||
IDT( 92, INTR, 1); IDT( 93, INTR, 1); IDT( 94, INTR, 1); IDT( 95, INTR, 1)
|
||||
IDT( 96, INTR, 1); IDT( 97, INTR, 1); IDT( 98, INTR, 1); IDT( 99, INTR, 1)
|
||||
IDT(100, INTR, 1); IDT(101, INTR, 1); IDT(102, INTR, 1); IDT(103, INTR, 1)
|
||||
IDT(104, INTR, 1); IDT(105, INTR, 1); IDT(106, INTR, 1); IDT(107, INTR, 1)
|
||||
IDT(108, INTR, 1); IDT(109, INTR, 1); IDT(110, INTR, 1); IDT(111, INTR, 1)
|
||||
IDT(112, INTR, 1); IDT(113, INTR, 1); IDT(114, INTR, 1); IDT(115, INTR, 1)
|
||||
IDT(116, INTR, 1); IDT(117, INTR, 1); IDT(118, INTR, 1); IDT(119, INTR, 1)
|
||||
IDT(120, INTR, 1); IDT(121, INTR, 1); IDT(122, INTR, 1); IDT(123, INTR, 1)
|
||||
IDT(124, INTR, 1); IDT(125, INTR, 1); IDT(126, INTR, 1); IDT(127, INTR, 1)
|
||||
IDT(128, INTR, 1); IDT(129, INTR, 1); IDT(130, INTR, 1); IDT(131, INTR, 1)
|
||||
IDT(132, INTR, 1); IDT(133, INTR, 1); IDT(134, INTR, 1); IDT(135, INTR, 1)
|
||||
IDT(136, INTR, 1); IDT(137, INTR, 1); IDT(138, INTR, 1); IDT(139, INTR, 1)
|
||||
IDT(140, INTR, 1); IDT(141, INTR, 1); IDT(142, INTR, 1); IDT(143, INTR, 1)
|
||||
IDT(144, INTR, 1); IDT(145, INTR, 1); IDT(146, INTR, 1); IDT(147, INTR, 1)
|
||||
IDT(148, INTR, 1); IDT(149, INTR, 1); IDT(150, INTR, 1); IDT(151, INTR, 1)
|
||||
IDT(152, INTR, 1); IDT(153, INTR, 1); IDT(154, INTR, 1); IDT(155, INTR, 1)
|
||||
IDT(156, INTR, 1); IDT(157, INTR, 1); IDT(158, INTR, 1); IDT(159, INTR, 1)
|
||||
IDT(160, INTR, 1); IDT(161, INTR, 1); IDT(162, INTR, 1); IDT(163, INTR, 1)
|
||||
IDT(164, INTR, 1); IDT(165, INTR, 1); IDT(166, INTR, 1); IDT(167, INTR, 1)
|
||||
IDT(168, INTR, 1); IDT(169, INTR, 1); IDT(170, INTR, 1); IDT(171, INTR, 1)
|
||||
IDT(172, INTR, 1); IDT(173, INTR, 1); IDT(174, INTR, 1); IDT(175, INTR, 1)
|
||||
IDT(176, INTR, 1); IDT(177, INTR, 1); IDT(178, INTR, 1); IDT(179, INTR, 1)
|
||||
IDT(180, INTR, 1); IDT(181, INTR, 1); IDT(182, INTR, 1); IDT(183, INTR, 1)
|
||||
IDT(184, INTR, 1); IDT(185, INTR, 1); IDT(186, INTR, 1); IDT(187, INTR, 1)
|
||||
IDT(188, INTR, 1); IDT(189, INTR, 1); IDT(190, INTR, 1); IDT(191, INTR, 1)
|
||||
IDT(192, INTR, 1); IDT(193, INTR, 1); IDT(194, INTR, 1); IDT(195, INTR, 1)
|
||||
IDT(196, INTR, 1); IDT(197, INTR, 1); IDT(198, INTR, 1); IDT(199, INTR, 1)
|
||||
IDT(200, INTR, 1); IDT(201, INTR, 1); IDT(202, INTR, 1); IDT(203, INTR, 1)
|
||||
IDT(204, INTR, 1); IDT(205, INTR, 1); IDT(206, INTR, 1); IDT(207, INTR, 1)
|
||||
IDT(208, INTR, 1); IDT(209, INTR, 1); IDT(210, INTR, 1); IDT(211, INTR, 1)
|
||||
IDT(212, INTR, 1); IDT(213, INTR, 1); IDT(214, INTR, 1); IDT(215, INTR, 1)
|
||||
IDT(216, INTR, 1); IDT(217, INTR, 1); IDT(218, INTR, 1); IDT(219, INTR, 1)
|
||||
IDT(220, INTR, 1); IDT(221, INTR, 1); IDT(222, INTR, 1); IDT(223, INTR, 1)
|
||||
IDT(224, INTR, 1); IDT(225, INTR, 1); IDT(226, INTR, 1); IDT(227, INTR, 1)
|
||||
IDT(228, INTR, 1); IDT(229, INTR, 1); IDT(230, INTR, 1); IDT(231, INTR, 1)
|
||||
IDT(232, INTR, 1); IDT(233, INTR, 1); IDT(234, INTR, 1); IDT(235, INTR, 1)
|
||||
IDT(236, INTR, 1); IDT(237, INTR, 1); IDT(238, INTR, 1); IDT(239, INTR, 1)
|
||||
IDT(240, INTR, 1); IDT(241, INTR, 1); IDT(242, INTR, 1); IDT(243, INTR, 1)
|
||||
IDT(244, INTR, 1); IDT(245, INTR, 1); IDT(246, INTR, 1); IDT(247, INTR, 1)
|
||||
IDT(248, INTR, 1); IDT(249, INTR, 1); IDT(250, INTR, 1); IDT(251, INTR, 1)
|
||||
IDT(252, INTR, 1); IDT(253, INTR, 1); IDT(254, INTR, 1); IDT(255, INTR, 1)
|
||||
/* Oops vector can be invoked from Ring 3 and runs on exception stack */
|
||||
IDT(Z_X86_OOPS_VECTOR, USER_INTR, EXC_STACK); IDT( 33, INTR, IRQ_STACK)
|
||||
IDT( 34, INTR, IRQ_STACK); IDT( 35, INTR, IRQ_STACK)
|
||||
IDT( 36, INTR, IRQ_STACK); IDT( 37, INTR, IRQ_STACK)
|
||||
IDT( 38, INTR, IRQ_STACK); IDT( 39, INTR, IRQ_STACK)
|
||||
IDT( 40, INTR, IRQ_STACK); IDT( 41, INTR, IRQ_STACK)
|
||||
IDT( 42, INTR, IRQ_STACK); IDT( 43, INTR, IRQ_STACK)
|
||||
IDT( 44, INTR, IRQ_STACK); IDT( 45, INTR, IRQ_STACK)
|
||||
IDT( 46, INTR, IRQ_STACK); IDT( 47, INTR, IRQ_STACK)
|
||||
IDT( 48, INTR, IRQ_STACK); IDT( 49, INTR, IRQ_STACK)
|
||||
IDT( 50, INTR, IRQ_STACK); IDT( 51, INTR, IRQ_STACK)
|
||||
IDT( 52, INTR, IRQ_STACK); IDT( 53, INTR, IRQ_STACK)
|
||||
IDT( 54, INTR, IRQ_STACK); IDT( 55, INTR, IRQ_STACK)
|
||||
IDT( 56, INTR, IRQ_STACK); IDT( 57, INTR, IRQ_STACK)
|
||||
IDT( 58, INTR, IRQ_STACK); IDT( 59, INTR, IRQ_STACK)
|
||||
IDT( 60, INTR, IRQ_STACK); IDT( 61, INTR, IRQ_STACK)
|
||||
IDT( 62, INTR, IRQ_STACK); IDT( 63, INTR, IRQ_STACK)
|
||||
IDT( 64, INTR, IRQ_STACK); IDT( 65, INTR, IRQ_STACK)
|
||||
IDT( 66, INTR, IRQ_STACK); IDT( 67, INTR, IRQ_STACK)
|
||||
IDT( 68, INTR, IRQ_STACK); IDT( 69, INTR, IRQ_STACK)
|
||||
IDT( 70, INTR, IRQ_STACK); IDT( 71, INTR, IRQ_STACK)
|
||||
IDT( 72, INTR, IRQ_STACK); IDT( 73, INTR, IRQ_STACK)
|
||||
IDT( 74, INTR, IRQ_STACK); IDT( 75, INTR, IRQ_STACK)
|
||||
IDT( 76, INTR, IRQ_STACK); IDT( 77, INTR, IRQ_STACK)
|
||||
IDT( 78, INTR, IRQ_STACK); IDT( 79, INTR, IRQ_STACK)
|
||||
IDT( 80, INTR, IRQ_STACK); IDT( 81, INTR, IRQ_STACK)
|
||||
IDT( 82, INTR, IRQ_STACK); IDT( 83, INTR, IRQ_STACK)
|
||||
IDT( 84, INTR, IRQ_STACK); IDT( 85, INTR, IRQ_STACK)
|
||||
IDT( 86, INTR, IRQ_STACK); IDT( 87, INTR, IRQ_STACK)
|
||||
IDT( 88, INTR, IRQ_STACK); IDT( 89, INTR, IRQ_STACK)
|
||||
IDT( 90, INTR, IRQ_STACK); IDT( 91, INTR, IRQ_STACK)
|
||||
IDT( 92, INTR, IRQ_STACK); IDT( 93, INTR, IRQ_STACK)
|
||||
IDT( 94, INTR, IRQ_STACK); IDT( 95, INTR, IRQ_STACK)
|
||||
IDT( 96, INTR, IRQ_STACK); IDT( 97, INTR, IRQ_STACK)
|
||||
IDT( 98, INTR, IRQ_STACK); IDT( 99, INTR, IRQ_STACK)
|
||||
IDT(100, INTR, IRQ_STACK); IDT(101, INTR, IRQ_STACK)
|
||||
IDT(102, INTR, IRQ_STACK); IDT(103, INTR, IRQ_STACK)
|
||||
IDT(104, INTR, IRQ_STACK); IDT(105, INTR, IRQ_STACK)
|
||||
IDT(106, INTR, IRQ_STACK); IDT(107, INTR, IRQ_STACK)
|
||||
IDT(108, INTR, IRQ_STACK); IDT(109, INTR, IRQ_STACK)
|
||||
IDT(110, INTR, IRQ_STACK); IDT(111, INTR, IRQ_STACK)
|
||||
IDT(112, INTR, IRQ_STACK); IDT(113, INTR, IRQ_STACK)
|
||||
IDT(114, INTR, IRQ_STACK); IDT(115, INTR, IRQ_STACK)
|
||||
IDT(116, INTR, IRQ_STACK); IDT(117, INTR, IRQ_STACK)
|
||||
IDT(118, INTR, IRQ_STACK); IDT(119, INTR, IRQ_STACK)
|
||||
IDT(120, INTR, IRQ_STACK); IDT(121, INTR, IRQ_STACK)
|
||||
IDT(122, INTR, IRQ_STACK); IDT(123, INTR, IRQ_STACK)
|
||||
IDT(124, INTR, IRQ_STACK); IDT(125, INTR, IRQ_STACK)
|
||||
IDT(126, INTR, IRQ_STACK); IDT(127, INTR, IRQ_STACK)
|
||||
IDT(128, INTR, IRQ_STACK); IDT(129, INTR, IRQ_STACK)
|
||||
IDT(130, INTR, IRQ_STACK); IDT(131, INTR, IRQ_STACK)
|
||||
IDT(132, INTR, IRQ_STACK); IDT(133, INTR, IRQ_STACK)
|
||||
IDT(134, INTR, IRQ_STACK); IDT(135, INTR, IRQ_STACK)
|
||||
IDT(136, INTR, IRQ_STACK); IDT(137, INTR, IRQ_STACK)
|
||||
IDT(138, INTR, IRQ_STACK); IDT(139, INTR, IRQ_STACK)
|
||||
IDT(140, INTR, IRQ_STACK); IDT(141, INTR, IRQ_STACK)
|
||||
IDT(142, INTR, IRQ_STACK); IDT(143, INTR, IRQ_STACK)
|
||||
IDT(144, INTR, IRQ_STACK); IDT(145, INTR, IRQ_STACK)
|
||||
IDT(146, INTR, IRQ_STACK); IDT(147, INTR, IRQ_STACK)
|
||||
IDT(148, INTR, IRQ_STACK); IDT(149, INTR, IRQ_STACK)
|
||||
IDT(150, INTR, IRQ_STACK); IDT(151, INTR, IRQ_STACK)
|
||||
IDT(152, INTR, IRQ_STACK); IDT(153, INTR, IRQ_STACK)
|
||||
IDT(154, INTR, IRQ_STACK); IDT(155, INTR, IRQ_STACK)
|
||||
IDT(156, INTR, IRQ_STACK); IDT(157, INTR, IRQ_STACK)
|
||||
IDT(158, INTR, IRQ_STACK); IDT(159, INTR, IRQ_STACK)
|
||||
IDT(160, INTR, IRQ_STACK); IDT(161, INTR, IRQ_STACK)
|
||||
IDT(162, INTR, IRQ_STACK); IDT(163, INTR, IRQ_STACK)
|
||||
IDT(164, INTR, IRQ_STACK); IDT(165, INTR, IRQ_STACK)
|
||||
IDT(166, INTR, IRQ_STACK); IDT(167, INTR, IRQ_STACK)
|
||||
IDT(168, INTR, IRQ_STACK); IDT(169, INTR, IRQ_STACK)
|
||||
IDT(170, INTR, IRQ_STACK); IDT(171, INTR, IRQ_STACK)
|
||||
IDT(172, INTR, IRQ_STACK); IDT(173, INTR, IRQ_STACK)
|
||||
IDT(174, INTR, IRQ_STACK); IDT(175, INTR, IRQ_STACK)
|
||||
IDT(176, INTR, IRQ_STACK); IDT(177, INTR, IRQ_STACK)
|
||||
IDT(178, INTR, IRQ_STACK); IDT(179, INTR, IRQ_STACK)
|
||||
IDT(180, INTR, IRQ_STACK); IDT(181, INTR, IRQ_STACK)
|
||||
IDT(182, INTR, IRQ_STACK); IDT(183, INTR, IRQ_STACK)
|
||||
IDT(184, INTR, IRQ_STACK); IDT(185, INTR, IRQ_STACK)
|
||||
IDT(186, INTR, IRQ_STACK); IDT(187, INTR, IRQ_STACK)
|
||||
IDT(188, INTR, IRQ_STACK); IDT(189, INTR, IRQ_STACK)
|
||||
IDT(190, INTR, IRQ_STACK); IDT(191, INTR, IRQ_STACK)
|
||||
IDT(192, INTR, IRQ_STACK); IDT(193, INTR, IRQ_STACK)
|
||||
IDT(194, INTR, IRQ_STACK); IDT(195, INTR, IRQ_STACK)
|
||||
IDT(196, INTR, IRQ_STACK); IDT(197, INTR, IRQ_STACK)
|
||||
IDT(198, INTR, IRQ_STACK); IDT(199, INTR, IRQ_STACK)
|
||||
IDT(200, INTR, IRQ_STACK); IDT(201, INTR, IRQ_STACK)
|
||||
IDT(202, INTR, IRQ_STACK); IDT(203, INTR, IRQ_STACK)
|
||||
IDT(204, INTR, IRQ_STACK); IDT(205, INTR, IRQ_STACK)
|
||||
IDT(206, INTR, IRQ_STACK); IDT(207, INTR, IRQ_STACK)
|
||||
IDT(208, INTR, IRQ_STACK); IDT(209, INTR, IRQ_STACK)
|
||||
IDT(210, INTR, IRQ_STACK); IDT(211, INTR, IRQ_STACK)
|
||||
IDT(212, INTR, IRQ_STACK); IDT(213, INTR, IRQ_STACK)
|
||||
IDT(214, INTR, IRQ_STACK); IDT(215, INTR, IRQ_STACK)
|
||||
IDT(216, INTR, IRQ_STACK); IDT(217, INTR, IRQ_STACK)
|
||||
IDT(218, INTR, IRQ_STACK); IDT(219, INTR, IRQ_STACK)
|
||||
IDT(220, INTR, IRQ_STACK); IDT(221, INTR, IRQ_STACK)
|
||||
IDT(222, INTR, IRQ_STACK); IDT(223, INTR, IRQ_STACK)
|
||||
IDT(224, INTR, IRQ_STACK); IDT(225, INTR, IRQ_STACK)
|
||||
IDT(226, INTR, IRQ_STACK); IDT(227, INTR, IRQ_STACK)
|
||||
IDT(228, INTR, IRQ_STACK); IDT(229, INTR, IRQ_STACK)
|
||||
IDT(230, INTR, IRQ_STACK); IDT(231, INTR, IRQ_STACK)
|
||||
IDT(232, INTR, IRQ_STACK); IDT(233, INTR, IRQ_STACK)
|
||||
IDT(234, INTR, IRQ_STACK); IDT(235, INTR, IRQ_STACK)
|
||||
IDT(236, INTR, IRQ_STACK); IDT(237, INTR, IRQ_STACK)
|
||||
IDT(238, INTR, IRQ_STACK); IDT(239, INTR, IRQ_STACK)
|
||||
IDT(240, INTR, IRQ_STACK); IDT(241, INTR, IRQ_STACK)
|
||||
IDT(242, INTR, IRQ_STACK); IDT(243, INTR, IRQ_STACK)
|
||||
IDT(244, INTR, IRQ_STACK); IDT(245, INTR, IRQ_STACK)
|
||||
IDT(246, INTR, IRQ_STACK); IDT(247, INTR, IRQ_STACK)
|
||||
IDT(248, INTR, IRQ_STACK); IDT(249, INTR, IRQ_STACK)
|
||||
IDT(250, INTR, IRQ_STACK); IDT(251, INTR, IRQ_STACK)
|
||||
IDT(252, INTR, IRQ_STACK); IDT(253, INTR, IRQ_STACK)
|
||||
IDT(254, INTR, IRQ_STACK); IDT(255, INTR, IRQ_STACK)
|
||||
|
||||
idt48:
|
||||
.word (idt48 - idt - 1)
|
||||
|
@ -670,7 +853,7 @@ pdp: .long 0x00000083 /* 0x83 = 1GB, R/W, P */
|
|||
.long 0
|
||||
.fill 4064, 1, 0
|
||||
|
||||
.section .lodata,"ad"
|
||||
.section .gdt,"ad"
|
||||
|
||||
/*
|
||||
* GDT - a single GDT is shared by all threads (and, eventually, all CPUs).
|
||||
|
@ -727,6 +910,8 @@ gdt48:
|
|||
.word (gdt48 - gdt - 1)
|
||||
.long gdt
|
||||
|
||||
.section .lodata,"ad"
|
||||
|
||||
/*
|
||||
* Known-good stack for handling CPU exceptions.
|
||||
*/
|
||||
|
@ -756,3 +941,33 @@ _exception_stack2:
|
|||
_exception_stack3:
|
||||
.fill CONFIG_EXCEPTION_STACK_SIZE, 1, 0xAA
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
.section .trampolines,"ad"
|
||||
|
||||
.global z_x86_trampoline_stack
|
||||
.align 16
|
||||
z_x86_trampoline_stack:
|
||||
.fill Z_X86_TRAMPOLINE_STACK_SIZE, 1, 0xAA
|
||||
|
||||
#if CONFIG_MP_NUM_CPUS > 1
|
||||
.global z_x86_trampoline_stack1
|
||||
.align 16
|
||||
z_x86_trampoline_stack1:
|
||||
.fill Z_X86_TRAMPOLINE_STACK_SIZE, 1, 0xAA
|
||||
#endif
|
||||
|
||||
#if CONFIG_MP_NUM_CPUS > 2
|
||||
.global z_x86_trampoline_stack2
|
||||
.align 16
|
||||
z_x86_trampoline_stack2:
|
||||
.fill Z_X86_TRAMPOLINE_STACK_SIZE, 1, 0xAA
|
||||
#endif
|
||||
|
||||
#if CONFIG_MP_NUM_CPUS > 3
|
||||
.global z_x86_trampoline_stack3
|
||||
.align 16
|
||||
z_x86_trampoline_stack3:
|
||||
.fill Z_X86_TRAMPOLINE_STACK_SIZE, 1, 0xAA
|
||||
#endif
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <ksched.h>
|
||||
#include <kernel_structs.h>
|
||||
#include <kernel_internal.h>
|
||||
#include <offsets_short.h>
|
||||
|
||||
extern void x86_sse_init(struct k_thread *); /* in locore.S */
|
||||
|
||||
|
|
|
@ -8,6 +8,53 @@
|
|||
#include <offsets_short.h>
|
||||
#include <syscall.h>
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Copy interrupt return stack context to the trampoline stack, switch back
|
||||
* to the user page table, and only then 'iret'. We jump to this instead
|
||||
* of calling 'iret' if KPTI is turned on. This must be invoked with interrupts
|
||||
* locked.
|
||||
*
|
||||
* Stack layout is expected to be what 'iretq' expects, which is as follows:
|
||||
*
|
||||
* 32 SS
|
||||
* 24 RSP
|
||||
* 16 RFLAGS
|
||||
* 8 CS
|
||||
* 0 RIP
|
||||
*/
|
||||
.global z_x86_trampoline_to_user
|
||||
z_x86_trampoline_to_user:
|
||||
/* Stash EDI, need a free register */
|
||||
pushq %rdi
|
||||
|
||||
/* Store old stack pointer and switch to trampoline stack */
|
||||
movq %rsp, %rdi
|
||||
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
|
||||
|
||||
/* Copy context */
|
||||
pushq 40(%rdi) /* SS */
|
||||
pushq 32(%rdi) /* RSP */
|
||||
pushq 24(%rdi) /* RFLAGS */
|
||||
pushq 16(%rdi) /* CS */
|
||||
pushq 8(%rdi) /* RIP */
|
||||
xchgq %rdi, (%rdi) /* Exchange old rdi to restore it and put
|
||||
trampoline stack address in its old storage
|
||||
area */
|
||||
|
||||
/* Switch to thread's page table */
|
||||
pushq %rax
|
||||
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
|
||||
movq ___cpu_t_current_OFFSET(%rax), %rax
|
||||
movq _thread_offset_to_ptables(%rax), %rax
|
||||
movq %rax, %cr3
|
||||
popq %rax
|
||||
movq $0, -8(%rsp) /* Delete stashed RAX data */
|
||||
|
||||
/* Trampoline stack should have nothing sensitive in it at this point */
|
||||
swapgs
|
||||
iretq
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
|
||||
/* Landing site for 'syscall' instruction
|
||||
*
|
||||
|
@ -21,17 +68,39 @@
|
|||
z_x86_syscall_entry_stub:
|
||||
swapgs
|
||||
|
||||
/* Switch to the privilege mode stack pointer stored in
|
||||
* x86_tss64.psp and store the user mode stack pointer in
|
||||
* x86_tss64.usp, immediately pushing it once the stack switch
|
||||
* is done since this is a per-cpu and not per-thread area.
|
||||
*
|
||||
* This dance is necessary as upon entry we have no free registers
|
||||
* nor a stack we can push to.
|
||||
/* Save original stack pointer from user mode in memory, at the
|
||||
* moment we have no free registers or stack to save it to. This
|
||||
* eventually gets put on the stack before we re-enable interrupts
|
||||
* as this is a per-cpu and not per-thread area.
|
||||
*/
|
||||
movq %rsp, %gs:__x86_tss64_t_usp_OFFSET
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* We need to switch to the trampoline stack so that we can
|
||||
* switch to the kernel's page table
|
||||
*/
|
||||
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
|
||||
|
||||
/* Load kernel's page table */
|
||||
pushq %rax
|
||||
movq $z_x86_kernel_ptables, %rax
|
||||
movq %rax, %cr3
|
||||
popq %rax
|
||||
movq $0, -8(%rsp) /* Delete stashed RAX data */
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
/* Switch to the privilege mode stack pointer stored in
|
||||
* x86_tss64.psp
|
||||
*/
|
||||
movq %gs:__x86_tss64_t_psp_OFFSET, %rsp
|
||||
|
||||
/* We're now on the privilege mode stack; push the old user stack
|
||||
* pointer onto it
|
||||
*/
|
||||
pushq %gs:__x86_tss64_t_usp_OFFSET
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
movq $0, %gs:__x86_tss64_t_usp_OFFSET
|
||||
#endif
|
||||
|
||||
sti /* re-enable interrupts */
|
||||
|
||||
|
@ -103,6 +172,29 @@ _id_ok:
|
|||
addq $X86_FXSAVE_SIZE, %rsp
|
||||
popq %rdi
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Lock IRQs as we are using per-cpu memory areas and the
|
||||
* trampoline stack
|
||||
*/
|
||||
cli
|
||||
|
||||
/* Stash user stack pointer and switch to trampoline stack */
|
||||
popq %gs:__x86_tss64_t_usp_OFFSET
|
||||
movq %gs:__x86_tss64_t_ist2_OFFSET, %rsp
|
||||
|
||||
/* Switch to thread's page table */
|
||||
pushq %rax
|
||||
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
|
||||
movq ___cpu_t_current_OFFSET(%rax), %rax
|
||||
movq _thread_offset_to_ptables(%rax), %rax
|
||||
movq %rax, %cr3
|
||||
popq %rax
|
||||
movq $0, -8(%rsp) /* Delete stashed RAX data */
|
||||
|
||||
/* Restore saved user stack pointer */
|
||||
movq %gs:__x86_tss64_t_usp_OFFSET, %rsp
|
||||
movq $0, %gs:__x86_tss64_t_usp_OFFSET
|
||||
#else
|
||||
/* Restore user stack pointer */
|
||||
popq %rsp
|
||||
|
||||
|
@ -111,6 +203,8 @@ _id_ok:
|
|||
* 'swapgs' and 'sysretq'
|
||||
*/
|
||||
cli
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
swapgs
|
||||
sysretq
|
||||
|
||||
|
@ -221,7 +315,6 @@ z_x86_userspace_enter:
|
|||
movq $0, (%rsp) /* Now a debugger-friendly return address */
|
||||
|
||||
/* cleanse other registers */
|
||||
xorq %rax, %rax
|
||||
xorq %rbx, %rbx
|
||||
xorq %rbp, %rbp
|
||||
xorq %r12, %r12
|
||||
|
@ -230,5 +323,14 @@ z_x86_userspace_enter:
|
|||
xorq %r15, %r15
|
||||
|
||||
cli
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Switch to thread's page table. We have free registers so no need
|
||||
* to involve the trampoline stack.
|
||||
*/
|
||||
movq %gs:__x86_tss64_t_cpu_OFFSET, %rax
|
||||
movq ___cpu_t_current_OFFSET(%rax), %rax
|
||||
movq _thread_offset_to_ptables(%rax), %rax
|
||||
movq %rax, %cr3
|
||||
#endif
|
||||
swapgs
|
||||
sysretq
|
||||
|
|
|
@ -26,14 +26,18 @@ GEN_OFFSET_SYM(_thread_arch_t, sse);
|
|||
#ifdef CONFIG_USERSPACE
|
||||
GEN_OFFSET_SYM(_thread_arch_t, ss);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, cs);
|
||||
#endif
|
||||
GEN_OFFSET_SYM(_thread_arch_t, psp);
|
||||
GEN_OFFSET_SYM(_thread_arch_t, ptables);
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
|
||||
GEN_OFFSET_SYM(x86_tss64_t, ist1);
|
||||
GEN_OFFSET_SYM(x86_tss64_t, ist2);
|
||||
GEN_OFFSET_SYM(x86_tss64_t, ist7);
|
||||
GEN_OFFSET_SYM(x86_tss64_t, cpu);
|
||||
#ifdef CONFIG_USERSPACE
|
||||
GEN_OFFSET_SYM(x86_tss64_t, psp);
|
||||
GEN_OFFSET_SYM(x86_tss64_t, usp);
|
||||
#endif
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
GEN_ABSOLUTE_SYM(__X86_TSS64_SIZEOF, sizeof(x86_tss64_t));
|
||||
|
||||
GEN_OFFSET_SYM(x86_cpuboot_t, ready);
|
||||
|
|
|
@ -25,17 +25,6 @@ static inline void page_tables_set(struct x86_page_tables *ptables)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Set initial stack pointer for privilege mode elevations */
|
||||
static inline void set_initial_psp(char *psp)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
__asm__ volatile("movq %0, %%gs:__x86_tss64_t_psp_OFFSET\n\t"
|
||||
: : "r" (psp));
|
||||
#else
|
||||
_main_tss.esp0 = (uintptr_t)psp;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Update the to the incoming thread's page table, and update the location of
|
||||
* the privilege elevation stack.
|
||||
*
|
||||
|
@ -49,16 +38,21 @@ static inline void set_initial_psp(char *psp)
|
|||
*
|
||||
* We don't need to update the privilege mode initial stack pointer either,
|
||||
* privilege elevation always lands on the trampoline stack and the irq/sycall
|
||||
* code has to manually transition off of it to the thread's kernel stack after
|
||||
* code has to manually transition off of it to the appropriate stack after
|
||||
* switching page tables.
|
||||
*/
|
||||
void z_x86_swap_update_page_tables(struct k_thread *incoming)
|
||||
{
|
||||
struct x86_page_tables *ptables;
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
/* 64-bit uses syscall/sysret which switches stacks manually,
|
||||
* tss64.psp is updated unconditionally in __resume
|
||||
*/
|
||||
if ((incoming->base.user_options & K_USER) != 0) {
|
||||
set_initial_psp(incoming->arch.psp);
|
||||
_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Check first that we actually need to do this, since setting
|
||||
* CR3 involves an expensive full TLB flush.
|
||||
|
@ -87,34 +81,6 @@ FUNC_NORETURN static void drop_to_user(k_thread_entry_t user_entry,
|
|||
CODE_UNREACHABLE;
|
||||
}
|
||||
|
||||
/* Does the following:
|
||||
*
|
||||
* - Initialize per-thread page tables and update thread->arch.ptables to
|
||||
* point to them.
|
||||
* - Set thread->arch.psp to point to the initial stack pointer for user
|
||||
* mode privilege elevation for system calls; supervisor mode threads leave
|
||||
* this uninitailized.
|
||||
*/
|
||||
static void prepare_user_thread(struct k_thread *thread)
|
||||
{
|
||||
struct z_x86_thread_stack_header *header =
|
||||
(struct z_x86_thread_stack_header *)thread->stack_obj;
|
||||
|
||||
__ASSERT((thread->base.user_options & K_USER) != 0,
|
||||
"not a user thread");
|
||||
|
||||
/* Create and program into the MMU the per-thread page tables */
|
||||
z_x86_thread_pt_init(thread);
|
||||
|
||||
thread->arch.psp =
|
||||
header->privilege_stack + sizeof(header->privilege_stack);
|
||||
}
|
||||
|
||||
static void prepare_supervisor_thread(struct k_thread *thread)
|
||||
{
|
||||
thread->arch.ptables = &z_x86_kernel_ptables;
|
||||
}
|
||||
|
||||
/* Preparation steps needed for all threads if user mode is turned on.
|
||||
*
|
||||
* Returns the initial entry point to swap into.
|
||||
|
@ -122,12 +88,17 @@ static void prepare_supervisor_thread(struct k_thread *thread)
|
|||
void *z_x86_userspace_prepare_thread(struct k_thread *thread)
|
||||
{
|
||||
void *initial_entry;
|
||||
struct z_x86_thread_stack_header *header =
|
||||
(struct z_x86_thread_stack_header *)thread->stack_obj;
|
||||
|
||||
thread->arch.psp =
|
||||
header->privilege_stack + sizeof(header->privilege_stack);
|
||||
|
||||
if ((thread->base.user_options & K_USER) != 0U) {
|
||||
prepare_user_thread(thread);
|
||||
z_x86_thread_pt_init(thread);
|
||||
initial_entry = drop_to_user;
|
||||
} else {
|
||||
prepare_supervisor_thread(thread);
|
||||
thread->arch.ptables = &z_x86_kernel_ptables;
|
||||
initial_entry = z_thread_entry;
|
||||
}
|
||||
|
||||
|
@ -137,7 +108,7 @@ void *z_x86_userspace_prepare_thread(struct k_thread *thread)
|
|||
FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
|
||||
void *p1, void *p2, void *p3)
|
||||
{
|
||||
prepare_user_thread(_current);
|
||||
z_x86_thread_pt_init(_current);
|
||||
|
||||
/* Apply memory domain configuration, if assigned. Threads that
|
||||
* started in user mode already had this done via z_setup_new_thread()
|
||||
|
|
|
@ -663,8 +663,13 @@ extern char z_shared_kernel_page_start[];
|
|||
|
||||
static inline bool is_within_system_ram(uintptr_t addr)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* FIXME: locore not included in CONFIG_SRAM_BASE_ADDRESS */
|
||||
return addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U));
|
||||
#else
|
||||
return (addr >= DT_PHYS_RAM_ADDR) &&
|
||||
(addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U)));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Ignored bit posiition at all levels */
|
||||
|
@ -758,9 +763,17 @@ static void add_mmu_region_page(struct x86_page_tables *ptables,
|
|||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
if (user_table && (flags & Z_X86_MMU_US) == 0 &&
|
||||
#ifdef CONFIG_X86_64
|
||||
addr >= (uintptr_t)&_lodata_start &&
|
||||
#endif
|
||||
addr != (uintptr_t)(&z_shared_kernel_page_start)) {
|
||||
/* All non-user accessible pages except the shared page
|
||||
* are marked non-present in the page table.
|
||||
*
|
||||
* For x86_64 we also make the locore text/rodata areas
|
||||
* present even though they don't have user mode access,
|
||||
* they contain necessary tables and program text for
|
||||
* successfully handling exceptions and interrupts.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -36,4 +36,8 @@ extern u8_t x86_cpu_loapics[]; /* CPU logical ID -> local APIC ID */
|
|||
|
||||
#endif /* _ASMLANGUAGE */
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
#define Z_X86_TRAMPOLINE_STACK_SIZE 128
|
||||
#endif
|
||||
|
||||
#endif /* ZEPHYR_ARCH_X86_INCLUDE_INTEL64_KERNEL_ARCH_DATA_H_ */
|
||||
|
|
|
@ -11,5 +11,4 @@ config BOARD_QEMU_X86_64
|
|||
bool "QEMU x86_64"
|
||||
depends on SOC_IA32
|
||||
select QEMU_TARGET
|
||||
select X86_NO_MELTDOWN # Until KPTI is enabled, allow testing
|
||||
select X86_64
|
||||
|
|
|
@ -36,8 +36,35 @@ SECTIONS
|
|||
*(.lorodata)
|
||||
MMU_PAGE_ALIGN
|
||||
_lodata_start = .;
|
||||
*(.tss)
|
||||
|
||||
*(.lodata)
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
/* Special page containing supervisor data that is still mapped in
|
||||
* user mode page tables. GDT, TSSes, trampoline stack, and
|
||||
* any LDT must go here as they always must live in a page that is
|
||||
* marked 'present'. Still not directly user accessible, but
|
||||
* no sensitive data should be here as Meltdown exploits may read it.
|
||||
*
|
||||
* On x86-64 the IDT is in rodata and doesn't need to be in the
|
||||
* trampoline page.
|
||||
*/
|
||||
MMU_PAGE_ALIGN
|
||||
z_shared_kernel_page_start = .;
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
*(.tss)
|
||||
*(.gdt)
|
||||
|
||||
#ifdef CONFIG_X86_KPTI
|
||||
*(.trampolines)
|
||||
MMU_PAGE_ALIGN
|
||||
z_shared_kernel_page_end = .;
|
||||
|
||||
ASSERT(z_shared_kernel_page_end - z_shared_kernel_page_start == 4096,
|
||||
"shared kernel area is not one memory page");
|
||||
#endif /* CONFIG_X86_KPTI */
|
||||
|
||||
MMU_PAGE_ALIGN
|
||||
_lodata_end = .;
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ struct x86_tss64 {
|
|||
|
||||
/* Storage area for user mode stack pointer when doing a syscall */
|
||||
char *usp;
|
||||
#endif
|
||||
#endif /* CONFIG_USERSPACE */
|
||||
} __packed __aligned(8);
|
||||
|
||||
typedef struct x86_tss64 x86_tss64_t;
|
||||
|
|
Loading…
Reference in a new issue