diff --git a/include/kernel/thread.h b/include/kernel/thread.h index ee6dc8f50e..ae0930b70e 100644 --- a/include/kernel/thread.h +++ b/include/kernel/thread.h @@ -116,6 +116,10 @@ struct _thread_base { /* this thread's entry in a timeout queue */ struct _timeout timeout; #endif + +#ifdef CONFIG_SCHED_THREAD_USAGE + uint64_t usage; +#endif }; typedef struct _thread_base _thread_base_t; diff --git a/include/kernel_structs.h b/include/kernel_structs.h index 5199f82b92..05d2ec309d 100644 --- a/include/kernel_structs.h +++ b/include/kernel_structs.h @@ -130,6 +130,10 @@ struct _cpu { uint8_t swap_ok; #endif +#ifdef CONFIG_SCHED_THREAD_USAGE + uint32_t usage0; +#endif + /* Per CPU architecture specifics */ struct _cpu_arch arch; }; diff --git a/kernel/Kconfig b/kernel/Kconfig index 26a078e873..7c09d4392f 100644 --- a/kernel/Kconfig +++ b/kernel/Kconfig @@ -377,6 +377,12 @@ config THREAD_MAX_NAME_LEN config INSTRUMENT_THREAD_SWITCHING bool +config SCHED_THREAD_USAGE + bool "Collect thread runtime usage" + depends on USE_SWITCH + help + Alternate implementation of thread runtime cycle usage + menuconfig THREAD_RUNTIME_STATS bool "Thread runtime statistics" select INSTRUMENT_THREAD_SWITCHING diff --git a/kernel/include/ksched.h b/kernel/include/ksched.h index 7d39825c52..f29e6c4166 100644 --- a/kernel/include/ksched.h +++ b/kernel/include/ksched.h @@ -363,4 +363,35 @@ static inline bool z_sched_wake_all(_wait_q_t *wait_q, int swap_retval, int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key, _wait_q_t *wait_q, k_timeout_t timeout, void **data); + +/** @brief Halt thread cycle usage accounting. + * + * Halts the accumulation of thread cycle usage and adds the current + * total to the thread's counter. Called on context switch. + * + * Note that this function is idempotent. The core kernel code calls + * it at the end of interrupt handlers (because that is where we have + * a portable hook) where we are context switching, which will include + * any cycles spent in the ISR in the per-thread accounting. But + * architecture code can also call it earlier out of interrupt entry + * to improve measurement fidelity. + * + * This function assumes local interrupts are masked (so that the + * current CPU pointer and current thread are safe to modify), but + * requires no other synchronizaton. Architecture layers don't need + * to do anything more. + */ +void z_sched_usage_stop(void); + +void z_sched_usage_start(struct k_thread *thread); + +static inline void z_sched_usage_switch(struct k_thread *thread) +{ + ARG_UNUSED(thread); +#ifdef CONFIG_SCHED_THREAD_USAGE + z_sched_usage_stop(); + z_sched_usage_start(thread); +#endif +} + #endif /* ZEPHYR_KERNEL_INCLUDE_KSCHED_H_ */ diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h index 917315d589..f43f194117 100644 --- a/kernel/include/kswap.h +++ b/kernel/include/kswap.h @@ -109,6 +109,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key, #ifdef CONFIG_TIMESLICING z_reset_time_slice(); #endif + z_sched_usage_switch(new_thread); #ifdef CONFIG_SMP _current_cpu->swap_ok = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 5eeb192268..32fb2341ac 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -937,6 +937,8 @@ void *z_get_next_switch_handle(void *interrupted) } new_thread = next_up(); + z_sched_usage_switch(new_thread); + if (old_thread != new_thread) { update_metairq_preempt(new_thread); wait_for_switch(new_thread); @@ -976,6 +978,7 @@ void *z_get_next_switch_handle(void *interrupted) } return ret; #else + z_sched_usage_switch(_kernel.ready_q.cache); _current->switch_handle = interrupted; set_current(_kernel.ready_q.cache); return _current->switch_handle; @@ -1731,3 +1734,55 @@ int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key, } return ret; } + +#ifdef CONFIG_SCHED_THREAD_USAGE + +static struct k_spinlock usage_lock; + +static uint32_t usage_now(void) +{ + uint32_t now = k_cycle_get_32(); + + /* Edge case: we use a zero as a null ("stop() already called") */ + return (now == 0) ? 1 : now; +} + +void z_sched_usage_start(struct k_thread *thread) +{ + /* One write through a volatile pointer doesn't require + * synchronization as long as _usage() treats it as volatile + * (we can't race with _stop() by design). + */ + _current_cpu->usage0 = usage_now(); +} + +void z_sched_usage_stop(void) +{ + k_spinlock_key_t k = k_spin_lock(&usage_lock); + uint32_t u0 = _current_cpu->usage0; + + if (u0 != 0) { + _current->base.usage += usage_now() - u0; + } + + _current_cpu->usage0 = 0; + k_spin_unlock(&usage_lock, k); +} + +uint64_t z_sched_thread_usage(struct k_thread *thread) +{ + k_spinlock_key_t k = k_spin_lock(&usage_lock); + uint32_t u0 = _current_cpu->usage0, now = usage_now(); + uint64_t ret = thread->base.usage; + + if (u0 != 0) { + ret += now - u0; + thread->base.usage = ret; + _current_cpu->usage0 = now; + } + + k_spin_unlock(&usage_lock, k); + return ret; +} + +#endif /* CONFIG_SCHED_THREAD_USAGE */