x86: properly align initial dummy thread

x86-32 thread objects require special alignment since they contain a buffer that is passed to fxsave/fxrstor instructions. This fell over if the dummy thread is created in a stack frame. Implement a custom swap to main for x86 which still uses a dummy thread, but in an unused part of the interrupt stack with proper alignment. Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
2020-05-12 16:23:15 -07:00 · 2020-05-12 16:23:15 -07:00 · d149909b03
parent 468efadd47
commit d149909b03
3 changed files with 35 additions and 14 deletions
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -38,6 +38,7 @@ config X86
 	select ARCH_IS_SET
 	select ATOMIC_OPERATIONS_BUILTIN
 	select HAS_DTS
+	select ARCH_HAS_CUSTOM_SWAP_TO_MAIN if !X86_64
 	help
 	  x86 architecture

--- a/arch/x86/core/ia32/thread.c
+++ b/arch/x86/core/ia32/thread.c
@ -15,6 +15,7 @@
 #include <kernel.h>
 #include <ksched.h>
 #include <arch/x86/mmustructs.h>
+#include <kswap.h>

 /* forward declaration */

@ -115,3 +116,29 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
 #endif /* CONFIG_LAZY_FPU_SHARING */
 	thread->arch.flags = 0;
 }
+
+/* The core kernel code puts the dummy thread on the stack, which unfortunately
+ * doesn't work for 32-bit x86 as k_thread objects must be aligned due to the
+ * buffer within them fed to fxsave/fxrstor.
+ *
+ * Use some sufficiently aligned bytes in the lower memory of the interrupt
+ * stack instead, otherwise the logic is more or less the same.
+ */
+void arch_switch_to_main_thread(struct k_thread *main_thread,
+				k_thread_stack_t *main_stack,
+				size_t main_stack_size,
+				k_thread_entry_t _main)
+{
+	struct k_thread *dummy_thread = (struct k_thread *)
+		ROUND_UP(Z_THREAD_STACK_BUFFER(z_interrupt_stacks[0]),
+			 FP_REG_SET_ALIGN);
+
+	__ASSERT(((uintptr_t)(&dummy_thread->arch.preempFloatReg) %
+		  FP_REG_SET_ALIGN) == 0,
+		 "unaligned dummy thread %p float member %p",
+		 dummy_thread, &dummy_thread->arch.preempFloatReg);
+
+	z_dummy_thread_init(dummy_thread);
+	z_swap_unlocked();
+	CODE_UNREACHABLE;
+}
--- a/include/arch/x86/ia32/thread.h
+++ b/include/arch/x86/ia32/thread.h
@ -26,12 +26,18 @@
 * since the 'fxsave' and 'fxrstor' instructions require this. In all other
 * cases a 4 byte boundary is sufficient.
 */
-
+#if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING)
 #ifdef CONFIG_SSE
 #define FP_REG_SET_ALIGN  16
 #else
 #define FP_REG_SET_ALIGN  4
 #endif
+#else
+/* Unused, no special alignment requirements, use default alignment for
+ * char buffers on this arch
+ */
+#define FP_REG_SET_ALIGN  1
+#endif /* CONFIG_*_FP_SHARING */

 /*
 * Bits for _thread_arch.flags, see their use in intstub.S et al.
@ -230,19 +236,6 @@ struct _thread_arch {
 	unsigned excNestCount; /* nested exception count */
 #endif /* CONFIG_LAZY_FPU_SHARING */

-	/*
-	 * The location of all floating point related structures/fields MUST be
-	 * located at the end of struct k_thread.  This way only the
-	 * threads that actually utilize non-integer capabilities need to
-	 * account for the increased memory required for storing FP state when
-	 * sizing stacks.
-	 *
-	 * Given that stacks "grow down" on IA-32, and the TCS is located
-	 * at the start of a thread's "workspace" memory, the stacks of
-	 * threads that do not utilize floating point instruction can
-	 * effectively consume the memory occupied by the 'tPreempFloatReg'
-	 * struct without ill effect.
-	 */
 	tPreempFloatReg preempFloatReg; /* volatile float register storage */
 };