kernel/sched: Fix free-memory write when ISRs abort _current
After a k_thread_abort(), the resulting thread struct is documented as unused/free memory that may be re-used (for example, to respawn a new thread). But in the special case of aborting the current thread from within an ISR, that wasn't quite happening. The scheduler cleanup would complete, but the architecture layer would still try to context switch away from the aborted thread on exit, and that can include writes to the now-reused thread struct! The specifics will depend on architecture (some do a full context save on entry, most don't), but in the case of USE_SWITCH=y it will at the very least write the switch_handle field. Fix this simply, with a per-cpu "switch dummy" thread struct for use as a target for context switches like this. There is some non-trivial memory cost to that; thread structs on many architectures are large. Pleasingly, this also addresses a known deadlock on SMP: because the "spin in ISR" step now happens as the very last stage of k_thread_abort() handling, the existing scheduler lock works to serialize calls such that it's impossible for a cycle of threads to independently decide to spin on each other: at least one will see itself as "already aborting" and break the cycle. Fixes #64646 Signed-off-by: Andy Ross <andyross@google.com>
This commit is contained in:
parent
93dc7e7438
commit
61c70626a5
|
@ -37,6 +37,8 @@ BUILD_ASSERT(K_LOWEST_APPLICATION_THREAD_PRIO
|
|||
#define Z_ASSERT_VALID_PRIO(prio, entry_point) __ASSERT((prio) == -1, "")
|
||||
#endif /* CONFIG_MULTITHREADING */
|
||||
|
||||
extern struct k_thread _thread_dummies[CONFIG_MP_MAX_NUM_CPUS];
|
||||
|
||||
void z_sched_init(void);
|
||||
void z_move_thread_to_end_of_prio_q(struct k_thread *thread);
|
||||
void z_unpend_thread_no_timeout(struct k_thread *thread);
|
||||
|
|
|
@ -650,12 +650,7 @@ FUNC_NORETURN void z_cstart(void)
|
|||
LOG_CORE_INIT();
|
||||
|
||||
#if defined(CONFIG_MULTITHREADING)
|
||||
/* Note: The z_ready_thread() call in prepare_multithreading() requires
|
||||
* a dummy thread even if CONFIG_ARCH_HAS_CUSTOM_SWAP_TO_MAIN=y
|
||||
*/
|
||||
struct k_thread dummy_thread;
|
||||
|
||||
z_dummy_thread_init(&dummy_thread);
|
||||
z_dummy_thread_init(&_thread_dummies[0]);
|
||||
#endif /* CONFIG_MULTITHREADING */
|
||||
/* do any necessary initialization of static devices */
|
||||
z_device_state_init();
|
||||
|
|
|
@ -30,6 +30,11 @@ extern struct k_thread *pending_current;
|
|||
|
||||
struct k_spinlock _sched_spinlock;
|
||||
|
||||
/* Storage to "complete" the context switch from an invalid/incomplete thread
|
||||
* context (ex: exiting an ISR that aborted _current)
|
||||
*/
|
||||
__incoherent struct k_thread _thread_dummies[CONFIG_MP_MAX_NUM_CPUS];
|
||||
|
||||
static void update_cache(int preempt_ok);
|
||||
static void halt_thread(struct k_thread *thread, uint8_t new_state);
|
||||
static void add_to_waitq_locked(struct k_thread *thread, _wait_q_t *wait_q);
|
||||
|
@ -423,8 +428,9 @@ void z_sched_start(struct k_thread *thread)
|
|||
* another CPU to catch the IPI we sent and halt. Note that we check
|
||||
* for ourselves being asynchronously halted first to prevent simple
|
||||
* deadlocks (but not complex ones involving cycles of 3+ threads!).
|
||||
* Acts to release the provided lock before returning.
|
||||
*/
|
||||
static k_spinlock_key_t thread_halt_spin(struct k_thread *thread, k_spinlock_key_t key)
|
||||
static void thread_halt_spin(struct k_thread *thread, k_spinlock_key_t key)
|
||||
{
|
||||
if (is_halting(_current)) {
|
||||
halt_thread(_current,
|
||||
|
@ -432,10 +438,11 @@ static k_spinlock_key_t thread_halt_spin(struct k_thread *thread, k_spinlock_key
|
|||
}
|
||||
k_spin_unlock(&_sched_spinlock, key);
|
||||
while (is_halting(thread)) {
|
||||
unsigned int k = arch_irq_lock();
|
||||
|
||||
arch_spin_relax(); /* Requires interrupts be masked */
|
||||
arch_irq_unlock(k);
|
||||
}
|
||||
key = k_spin_lock(&_sched_spinlock);
|
||||
z_sched_switch_spin(thread);
|
||||
return key;
|
||||
}
|
||||
|
||||
/* Shared handler for k_thread_{suspend,abort}(). Called with the
|
||||
|
@ -465,8 +472,7 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key,
|
|||
arch_sched_ipi();
|
||||
#endif
|
||||
if (arch_is_in_isr()) {
|
||||
key = thread_halt_spin(thread, key);
|
||||
k_spin_unlock(&_sched_spinlock, key);
|
||||
thread_halt_spin(thread, key);
|
||||
} else {
|
||||
add_to_waitq_locked(_current, wq);
|
||||
z_swap(&_sched_spinlock, key);
|
||||
|
@ -480,6 +486,10 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key,
|
|||
k_spin_unlock(&_sched_spinlock, key);
|
||||
}
|
||||
}
|
||||
/* NOTE: the scheduler lock has been released. Don't put
|
||||
* logic here, it's likely to be racy/deadlocky even if you
|
||||
* re-take the lock!
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
|
@ -1279,6 +1289,8 @@ extern void thread_abort_hook(struct k_thread *thread);
|
|||
*/
|
||||
static void halt_thread(struct k_thread *thread, uint8_t new_state)
|
||||
{
|
||||
bool dummify = false;
|
||||
|
||||
/* We hold the lock, and the thread is known not to be running
|
||||
* anywhere.
|
||||
*/
|
||||
|
@ -1295,6 +1307,16 @@ static void halt_thread(struct k_thread *thread, uint8_t new_state)
|
|||
}
|
||||
(void)z_abort_thread_timeout(thread);
|
||||
unpend_all(&thread->join_queue);
|
||||
|
||||
/* Edge case: aborting _current from within an
|
||||
* ISR that preempted it requires clearing the
|
||||
* _current pointer so the upcoming context
|
||||
* switch doesn't clobber the now-freed
|
||||
* memory
|
||||
*/
|
||||
if (thread == _current && arch_is_in_isr()) {
|
||||
dummify = true;
|
||||
}
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
unpend_all(&thread->halt_queue);
|
||||
|
@ -1333,6 +1355,16 @@ static void halt_thread(struct k_thread *thread, uint8_t new_state)
|
|||
#ifdef CONFIG_THREAD_ABORT_NEED_CLEANUP
|
||||
k_thread_abort_cleanup(thread);
|
||||
#endif /* CONFIG_THREAD_ABORT_NEED_CLEANUP */
|
||||
|
||||
/* Do this "set _current to dummy" step last so that
|
||||
* subsystems above can rely on _current being
|
||||
* unchanged. Disabled for posix as that arch
|
||||
* continues to use the _current pointer in its swap
|
||||
* code.
|
||||
*/
|
||||
if (dummify && !IS_ENABLED(CONFIG_ARCH_POSIX)) {
|
||||
z_dummy_thread_init(&_thread_dummies[_current_cpu->id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,6 @@ static void wait_for_start_signal(atomic_t *start_flag)
|
|||
|
||||
static inline void smp_init_top(void *arg)
|
||||
{
|
||||
struct k_thread dummy_thread;
|
||||
struct cpu_start_cb csc = arg ? *(struct cpu_start_cb *)arg : (struct cpu_start_cb){0};
|
||||
|
||||
/* Let start_cpu() know that this CPU has powered up. */
|
||||
|
@ -123,7 +122,7 @@ static inline void smp_init_top(void *arg)
|
|||
/* Initialize the dummy thread struct so that
|
||||
* the scheduler can schedule actual threads to run.
|
||||
*/
|
||||
z_dummy_thread_init(&dummy_thread);
|
||||
z_dummy_thread_init(&_thread_dummies[arch_curr_cpu()->id]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYS_CLOCK_EXISTS
|
||||
|
|
Loading…
Reference in a new issue