x86: properly align initial dummy thread

x86-32 thread objects require special alignment since they
contain a buffer that is passed to fxsave/fxrstor instructions.
This fell over if the dummy thread is created in a stack frame.

Implement a custom swap to main for x86 which still uses a
dummy thread, but in an unused part of the interrupt stack
with proper alignment.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
Andrew Boie 2020-05-12 16:23:15 -07:00 committed by Carles Cufí
parent 468efadd47
commit d149909b03
3 changed files with 35 additions and 14 deletions

View file

@ -38,6 +38,7 @@ config X86
select ARCH_IS_SET
select ATOMIC_OPERATIONS_BUILTIN
select HAS_DTS
select ARCH_HAS_CUSTOM_SWAP_TO_MAIN if !X86_64
help
x86 architecture

View file

@ -15,6 +15,7 @@
#include <kernel.h>
#include <ksched.h>
#include <arch/x86/mmustructs.h>
#include <kswap.h>
/* forward declaration */
@ -115,3 +116,29 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
#endif /* CONFIG_LAZY_FPU_SHARING */
thread->arch.flags = 0;
}
/* The core kernel code puts the dummy thread on the stack, which unfortunately
* doesn't work for 32-bit x86 as k_thread objects must be aligned due to the
* buffer within them fed to fxsave/fxrstor.
*
* Use some sufficiently aligned bytes in the lower memory of the interrupt
* stack instead, otherwise the logic is more or less the same.
*/
void arch_switch_to_main_thread(struct k_thread *main_thread,
k_thread_stack_t *main_stack,
size_t main_stack_size,
k_thread_entry_t _main)
{
struct k_thread *dummy_thread = (struct k_thread *)
ROUND_UP(Z_THREAD_STACK_BUFFER(z_interrupt_stacks[0]),
FP_REG_SET_ALIGN);
__ASSERT(((uintptr_t)(&dummy_thread->arch.preempFloatReg) %
FP_REG_SET_ALIGN) == 0,
"unaligned dummy thread %p float member %p",
dummy_thread, &dummy_thread->arch.preempFloatReg);
z_dummy_thread_init(dummy_thread);
z_swap_unlocked();
CODE_UNREACHABLE;
}

View file

@ -26,12 +26,18 @@
* since the 'fxsave' and 'fxrstor' instructions require this. In all other
* cases a 4 byte boundary is sufficient.
*/
#if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING)
#ifdef CONFIG_SSE
#define FP_REG_SET_ALIGN 16
#else
#define FP_REG_SET_ALIGN 4
#endif
#else
/* Unused, no special alignment requirements, use default alignment for
* char buffers on this arch
*/
#define FP_REG_SET_ALIGN 1
#endif /* CONFIG_*_FP_SHARING */
/*
* Bits for _thread_arch.flags, see their use in intstub.S et al.
@ -230,19 +236,6 @@ struct _thread_arch {
unsigned excNestCount; /* nested exception count */
#endif /* CONFIG_LAZY_FPU_SHARING */
/*
* The location of all floating point related structures/fields MUST be
* located at the end of struct k_thread. This way only the
* threads that actually utilize non-integer capabilities need to
* account for the increased memory required for storing FP state when
* sizing stacks.
*
* Given that stacks "grow down" on IA-32, and the TCS is located
* at the start of a thread's "workspace" memory, the stacks of
* threads that do not utilize floating point instruction can
* effectively consume the memory occupied by the 'tPreempFloatReg'
* struct without ill effect.
*/
tPreempFloatReg preempFloatReg; /* volatile float register storage */
};