kernel/sched: Add "thread_usage" API for thread runtime cycle monitoring

This is an alternate backend that does what THREAD_RUNTIME_STATS is doing currently, but with a few advantages: * Correctly synchronized: you can't race against a running thread (potentially on another CPU!) while querying its usage. * Realtime results: you get the right answer always, up to timer precision, even if a thread has been running for a while uninterrupted and hasn't updated its total. * Portable, no need for per-architecture code at all for the simple case. (It leverages the USE_SWITCH layer to do this, so won't work on older architectures) * Faster/smaller: minimizes use of 64 bit math; lower overhead in thread struct (keeps the scratch "started" time in the CPU struct instead). One 64 bit counter per thread and a 32 bit scratch register in the CPU struct. * Standalone. It's a core (but optional) scheduler feature, no dependence on para-kernel configuration like the tracing infrastructure. * More precise: allows architectures to optionally call a trivial zero-argument/no-result cdecl function out of interrupt entry to avoid accounting for ISR runtime in thread totals. No configuration needed here, if it's called then you get proper ISR accounting, and if not you don't. For right now, pending unification, it's added side-by-side with the older API and left as a z_*() internal symbol. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
2021-09-27 08:22:43 -07:00 · 2021-09-27 08:22:43 -07:00 · 40d12c142d
parent 0c1835052a
commit 40d12c142d
6 changed files with 101 additions and 0 deletions
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@ -116,6 +116,10 @@ struct _thread_base {
 	/* this thread's entry in a timeout queue */
 	struct _timeout timeout;
 #endif
+
+#ifdef CONFIG_SCHED_THREAD_USAGE
+	uint64_t usage;
+#endif
 };

 typedef struct _thread_base _thread_base_t;
--- a/include/kernel_structs.h
+++ b/include/kernel_structs.h
@ -130,6 +130,10 @@ struct _cpu {
 	uint8_t swap_ok;
 #endif

+#ifdef CONFIG_SCHED_THREAD_USAGE
+	uint32_t usage0;
+#endif
+
 	/* Per CPU architecture specifics */
 	struct _cpu_arch arch;
 };
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@ -377,6 +377,12 @@ config THREAD_MAX_NAME_LEN
 config INSTRUMENT_THREAD_SWITCHING
 	bool

+config SCHED_THREAD_USAGE
+	bool "Collect thread runtime usage"
+	depends on USE_SWITCH
+	help
+	  Alternate implementation of thread runtime cycle usage
+
 menuconfig THREAD_RUNTIME_STATS
 	bool "Thread runtime statistics"
 	select INSTRUMENT_THREAD_SWITCHING
--- a/kernel/include/ksched.h
+++ b/kernel/include/ksched.h
@ -363,4 +363,35 @@ static inline bool z_sched_wake_all(_wait_q_t *wait_q, int swap_retval,
 int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key,
 		 _wait_q_t *wait_q, k_timeout_t timeout, void **data);

+
+/** @brief Halt thread cycle usage accounting.
+ *
+ * Halts the accumulation of thread cycle usage and adds the current
+ * total to the thread's counter.  Called on context switch.
+ *
+ * Note that this function is idempotent.  The core kernel code calls
+ * it at the end of interrupt handlers (because that is where we have
+ * a portable hook) where we are context switching, which will include
+ * any cycles spent in the ISR in the per-thread accounting.  But
+ * architecture code can also call it earlier out of interrupt entry
+ * to improve measurement fidelity.
+ *
+ * This function assumes local interrupts are masked (so that the
+ * current CPU pointer and current thread are safe to modify), but
+ * requires no other synchronizaton.  Architecture layers don't need
+ * to do anything more.
+ */
+void z_sched_usage_stop(void);
+
+void z_sched_usage_start(struct k_thread *thread);
+
+static inline void z_sched_usage_switch(struct k_thread *thread)
+{
+	ARG_UNUSED(thread);
+#ifdef CONFIG_SCHED_THREAD_USAGE
+	z_sched_usage_stop();
+	z_sched_usage_start(thread);
+#endif
+}
+
 #endif /* ZEPHYR_KERNEL_INCLUDE_KSCHED_H_ */
--- a/kernel/include/kswap.h
+++ b/kernel/include/kswap.h
@ -109,6 +109,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 #ifdef CONFIG_TIMESLICING
 		z_reset_time_slice();
 #endif
+		z_sched_usage_switch(new_thread);

 #ifdef CONFIG_SMP
 		_current_cpu->swap_ok = 0;
--- a/kernel/sched.c
+++ b/kernel/sched.c
@ -937,6 +937,8 @@ void *z_get_next_switch_handle(void *interrupted)
 		}
 		new_thread = next_up();

+		z_sched_usage_switch(new_thread);
+
 		if (old_thread != new_thread) {
 			update_metairq_preempt(new_thread);
 			wait_for_switch(new_thread);
@ -976,6 +978,7 @@ void *z_get_next_switch_handle(void *interrupted)
 	}
 	return ret;
 #else
+	z_sched_usage_switch(_kernel.ready_q.cache);
 	_current->switch_handle = interrupted;
 	set_current(_kernel.ready_q.cache);
 	return _current->switch_handle;
@ -1731,3 +1734,55 @@ int z_sched_wait(struct k_spinlock *lock, k_spinlock_key_t key,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_SCHED_THREAD_USAGE
+
+static struct k_spinlock usage_lock;
+
+static uint32_t usage_now(void)
+{
+	uint32_t now = k_cycle_get_32();
+
+	/* Edge case: we use a zero as a null ("stop() already called") */
+	return (now == 0) ? 1 : now;
+}
+
+void z_sched_usage_start(struct k_thread *thread)
+{
+	/* One write through a volatile pointer doesn't require
+	 * synchronization as long as _usage() treats it as volatile
+	 * (we can't race with _stop() by design).
+	 */
+	_current_cpu->usage0 = usage_now();
+}
+
+void z_sched_usage_stop(void)
+{
+	k_spinlock_key_t k = k_spin_lock(&usage_lock);
+	uint32_t u0 = _current_cpu->usage0;
+
+	if (u0 != 0) {
+		_current->base.usage += usage_now() - u0;
+	}
+
+	_current_cpu->usage0 = 0;
+	k_spin_unlock(&usage_lock, k);
+}
+
+uint64_t z_sched_thread_usage(struct k_thread *thread)
+{
+	k_spinlock_key_t k = k_spin_lock(&usage_lock);
+	uint32_t u0 = _current_cpu->usage0, now = usage_now();
+	uint64_t ret = thread->base.usage;
+
+	if (u0 != 0) {
+		ret += now - u0;
+		thread->base.usage = ret;
+		_current_cpu->usage0 = now;
+	}
+
+	k_spin_unlock(&usage_lock, k);
+	return ret;
+}
+
+#endif /* CONFIG_SCHED_THREAD_USAGE */