x86: properly align initial dummy thread
x86-32 thread objects require special alignment since they contain a buffer that is passed to fxsave/fxrstor instructions. This fell over if the dummy thread is created in a stack frame. Implement a custom swap to main for x86 which still uses a dummy thread, but in an unused part of the interrupt stack with proper alignment. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
This commit is contained in:
parent
468efadd47
commit
d149909b03
|
@ -38,6 +38,7 @@ config X86
|
|||
select ARCH_IS_SET
|
||||
select ATOMIC_OPERATIONS_BUILTIN
|
||||
select HAS_DTS
|
||||
select ARCH_HAS_CUSTOM_SWAP_TO_MAIN if !X86_64
|
||||
help
|
||||
x86 architecture
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <kernel.h>
|
||||
#include <ksched.h>
|
||||
#include <arch/x86/mmustructs.h>
|
||||
#include <kswap.h>
|
||||
|
||||
/* forward declaration */
|
||||
|
||||
|
@ -115,3 +116,29 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
#endif /* CONFIG_LAZY_FPU_SHARING */
|
||||
thread->arch.flags = 0;
|
||||
}
|
||||
|
||||
/* The core kernel code puts the dummy thread on the stack, which unfortunately
|
||||
* doesn't work for 32-bit x86 as k_thread objects must be aligned due to the
|
||||
* buffer within them fed to fxsave/fxrstor.
|
||||
*
|
||||
* Use some sufficiently aligned bytes in the lower memory of the interrupt
|
||||
* stack instead, otherwise the logic is more or less the same.
|
||||
*/
|
||||
void arch_switch_to_main_thread(struct k_thread *main_thread,
|
||||
k_thread_stack_t *main_stack,
|
||||
size_t main_stack_size,
|
||||
k_thread_entry_t _main)
|
||||
{
|
||||
struct k_thread *dummy_thread = (struct k_thread *)
|
||||
ROUND_UP(Z_THREAD_STACK_BUFFER(z_interrupt_stacks[0]),
|
||||
FP_REG_SET_ALIGN);
|
||||
|
||||
__ASSERT(((uintptr_t)(&dummy_thread->arch.preempFloatReg) %
|
||||
FP_REG_SET_ALIGN) == 0,
|
||||
"unaligned dummy thread %p float member %p",
|
||||
dummy_thread, &dummy_thread->arch.preempFloatReg);
|
||||
|
||||
z_dummy_thread_init(dummy_thread);
|
||||
z_swap_unlocked();
|
||||
CODE_UNREACHABLE;
|
||||
}
|
||||
|
|
|
@ -26,12 +26,18 @@
|
|||
* since the 'fxsave' and 'fxrstor' instructions require this. In all other
|
||||
* cases a 4 byte boundary is sufficient.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING)
|
||||
#ifdef CONFIG_SSE
|
||||
#define FP_REG_SET_ALIGN 16
|
||||
#else
|
||||
#define FP_REG_SET_ALIGN 4
|
||||
#endif
|
||||
#else
|
||||
/* Unused, no special alignment requirements, use default alignment for
|
||||
* char buffers on this arch
|
||||
*/
|
||||
#define FP_REG_SET_ALIGN 1
|
||||
#endif /* CONFIG_*_FP_SHARING */
|
||||
|
||||
/*
|
||||
* Bits for _thread_arch.flags, see their use in intstub.S et al.
|
||||
|
@ -230,19 +236,6 @@ struct _thread_arch {
|
|||
unsigned excNestCount; /* nested exception count */
|
||||
#endif /* CONFIG_LAZY_FPU_SHARING */
|
||||
|
||||
/*
|
||||
* The location of all floating point related structures/fields MUST be
|
||||
* located at the end of struct k_thread. This way only the
|
||||
* threads that actually utilize non-integer capabilities need to
|
||||
* account for the increased memory required for storing FP state when
|
||||
* sizing stacks.
|
||||
*
|
||||
* Given that stacks "grow down" on IA-32, and the TCS is located
|
||||
* at the start of a thread's "workspace" memory, the stacks of
|
||||
* threads that do not utilize floating point instruction can
|
||||
* effectively consume the memory occupied by the 'tPreempFloatReg'
|
||||
* struct without ill effect.
|
||||
*/
|
||||
tPreempFloatReg preempFloatReg; /* volatile float register storage */
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue