arch/xtensa: Invalidate bottom of outbound stacks

Both new thread creation and context switch had the same mistake in
cache management: the bottom of the stack (the "unused" region between
the lower memory bound and the live stack pointer) needs to be
invalidated before we switch, because otherwise any dirty lines we
might have left over can get flushed out on top of the same thread on
another CPU that is putting live data there.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2021-02-22 14:31:11 -08:00 committed by Anas Nashif
parent 64cf33952d
commit b28da4a3b7
2 changed files with 52 additions and 13 deletions

View file

@ -60,9 +60,6 @@ void *xtensa_init_stack(struct k_thread *thread, int *stack_top,
bsa[-9] = bsa;
ret = &bsa[-9];
#ifdef CONFIG_KERNEL_COHERENCE
z_xtensa_cache_flush(ret, (char *)stack_top - (char *)ret);
#endif
return ret;
}
@ -73,6 +70,11 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
thread->switch_handle = xtensa_init_stack(thread,
(int *)stack_ptr, entry,
p1, p2, p3);
#ifdef CONFIG_KERNEL_COHERENCE
__ASSERT((((size_t)stack) % XCHAL_DCACHE_LINESIZE) == 0, "");
__ASSERT((((size_t)stack_ptr) % XCHAL_DCACHE_LINESIZE) == 0, "");
z_xtensa_cache_flush_inv(stack, (char *)stack_ptr - (char *)stack);
#endif
}
void z_irq_spurious(const void *arg)

View file

@ -71,9 +71,9 @@ static inline bool arch_mem_coherent(void *ptr)
#endif
#ifdef CONFIG_KERNEL_COHERENCE
static inline void arch_cohere_stacks(struct k_thread *old_thread,
void *old_switch_handle,
struct k_thread *new_thread)
static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread,
void *old_switch_handle,
struct k_thread *new_thread)
{
size_t ostack = old_thread->stack_info.start;
size_t osz = old_thread->stack_info.size;
@ -83,24 +83,61 @@ static inline void arch_cohere_stacks(struct k_thread *old_thread,
size_t nsz = new_thread->stack_info.size;
size_t nsp = (size_t) new_thread->switch_handle;
/* The "live" area (the region between the switch handle,
* which is the stack pointer, and the top of the stack
* memory) of the inbound stack needs to be invalidated: it
* may contain data that was modified on another CPU since the
* last time this CPU ran the thread, and our cache may be
* stale.
*
* The corresponding "dead area" of the inbound stack can be
* ignored. We may have cached data in that region, but by
* definition any unused stack memory will always be written
* before being read (well, unless the code has an
* uninitialized data error) so our stale cache will be
* automatically overwritten as needed.
*/
z_xtensa_cache_inv((void *)nsp, (nstack + nsz) - nsp);
/* FIXME: dummy initializion threads don't have stack info set
* up and explode the logic above. Find a way to get this
* test out of the hot paths!
/* Dummy threads appear at system initialization, but don't
* have stack_info data and will never be saved. Ignore.
*/
if (old_thread->base.thread_state & _THREAD_DUMMY) {
return;
}
/* In interrupt context, we have a valid frame already from
* the interrupt entry code, but for arch_switch() that hasn't
* happened yet. It will do the flush itself, we just have to
* calculate the boundary for it.
/* For the outbound thread, we obviousy want to flush any data
* in the live area (for the benefit of whichever CPU runs
* this thread next). But we ALSO have to invalidate the dead
* region of the stack. Those lines may have DIRTY data in
* our own cache, and we cannot be allowed to write them back
* later on top of the stack's legitimate owner!
*
* This work comes in two flavors. In interrupts, the
* outgoing context has already been saved for us, so we can
* do the flush right here. In direct context switches, we
* are still using the stack, so we do the invalidate of the
* bottom here, (and flush the line containing SP to handle
* the overlap). The remaining flush of the live region
* happens in the assembly code once the context is pushed, up
* to the stack top stashed in a special register.
*/
if (old_switch_handle != NULL) {
z_xtensa_cache_flush((void *)osp, (ostack + osz) - osp);
z_xtensa_cache_inv((void *)ostack, osp - ostack);
} else {
/* When in a switch, our current stack is the outbound
* stack. Flush the single line containing the stack
* bottom (which is live data) before invalidating
* everything below that. Remember that the 16 bytes
* below our SP are the calling function's spill area
* and may be live too.
*/
__asm__ volatile("mov %0, a1" : "=r"(osp));
osp -= 16;
z_xtensa_cache_flush((void *)osp, 1);
z_xtensa_cache_inv((void *)ostack, osp - ostack);
/* FIXME: hardcoding EXCSAVE3 is bad, should be
* configurable a-la XTENSA_KERNEL_CPU_PTR_SR.
*/