kernel: priority queues: declare as static inlines
After the move to C files we got some drop in the performance when
running latency_measure. This patch declares the priority queue
functions as static inlines with minor optimizations.
The result for one metric (on qemu):
3.6 and before the anything was changed:
Get data from LIFO (w/ ctx switch): 13087 ns
after original change (46484da502
):
Get data from LIFO (w/ ctx switch): 13663 ns
with this change:
Get data from LIFO (w/ ctx switch): 12543 ns
So overall, a net gain of ~ 500ns that can be seen across the board on many
of the metrics.
Signed-off-by: Anas Nashif <anas.nashif@intel.com>
This commit is contained in:
parent
0b8714bcde
commit
4593f0d71c
|
@ -7,13 +7,20 @@
|
||||||
#ifndef ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
|
#ifndef ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
|
||||||
#define ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
|
#define ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
|
||||||
|
|
||||||
|
#include <zephyr/sys/math_extras.h>
|
||||||
|
#include <zephyr/sys/dlist.h>
|
||||||
|
|
||||||
/* Dump Scheduling */
|
extern int32_t z_sched_prio_cmp(struct k_thread *thread_1,
|
||||||
|
struct k_thread *thread_2);
|
||||||
|
|
||||||
|
bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b);
|
||||||
|
|
||||||
|
/* Dumb Scheduling */
|
||||||
#if defined(CONFIG_SCHED_DUMB)
|
#if defined(CONFIG_SCHED_DUMB)
|
||||||
#define _priq_run_add z_priq_dumb_add
|
#define _priq_run_add z_priq_dumb_add
|
||||||
#define _priq_run_remove z_priq_dumb_remove
|
#define _priq_run_remove z_priq_dumb_remove
|
||||||
# if defined(CONFIG_SCHED_CPU_MASK)
|
# if defined(CONFIG_SCHED_CPU_MASK)
|
||||||
# define _priq_run_best _priq_dumb_mask_best
|
# define _priq_run_best z_priq_dumb_mask_best
|
||||||
# else
|
# else
|
||||||
# define _priq_run_best z_priq_dumb_best
|
# define _priq_run_best z_priq_dumb_best
|
||||||
# endif /* CONFIG_SCHED_CPU_MASK */
|
# endif /* CONFIG_SCHED_CPU_MASK */
|
||||||
|
@ -25,11 +32,11 @@
|
||||||
/* Multi Queue Scheduling */
|
/* Multi Queue Scheduling */
|
||||||
#elif defined(CONFIG_SCHED_MULTIQ)
|
#elif defined(CONFIG_SCHED_MULTIQ)
|
||||||
|
|
||||||
# if defined(CONFIG_64BIT)
|
#if defined(CONFIG_64BIT)
|
||||||
# define NBITS 64
|
#define NBITS 64
|
||||||
# else
|
#else
|
||||||
# define NBITS 32
|
#define NBITS 32
|
||||||
# endif
|
#endif /* CONFIG_64BIT */
|
||||||
|
|
||||||
#define _priq_run_add z_priq_mq_add
|
#define _priq_run_add z_priq_mq_add
|
||||||
#define _priq_run_remove z_priq_mq_remove
|
#define _priq_run_remove z_priq_mq_remove
|
||||||
|
@ -40,30 +47,99 @@ static ALWAYS_INLINE void z_priq_mq_remove(struct _priq_mq *pq, struct k_thread
|
||||||
|
|
||||||
/* Scalable Wait Queue */
|
/* Scalable Wait Queue */
|
||||||
#if defined(CONFIG_WAITQ_SCALABLE)
|
#if defined(CONFIG_WAITQ_SCALABLE)
|
||||||
#define z_priq_wait_add z_priq_rb_add
|
#define _priq_wait_add z_priq_rb_add
|
||||||
#define _priq_wait_remove z_priq_rb_remove
|
#define _priq_wait_remove z_priq_rb_remove
|
||||||
#define _priq_wait_best z_priq_rb_best
|
#define _priq_wait_best z_priq_rb_best
|
||||||
/* Dump Wait Queue */
|
/* Dumb Wait Queue */
|
||||||
#elif defined(CONFIG_WAITQ_DUMB)
|
#elif defined(CONFIG_WAITQ_DUMB)
|
||||||
#define z_priq_wait_add z_priq_dumb_add
|
#define _priq_wait_add z_priq_dumb_add
|
||||||
#define _priq_wait_remove z_priq_dumb_remove
|
#define _priq_wait_remove z_priq_dumb_remove
|
||||||
#define _priq_wait_best z_priq_dumb_best
|
#define _priq_wait_best z_priq_dumb_best
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Dumb Scheduling*/
|
static ALWAYS_INLINE void z_priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread)
|
||||||
struct k_thread *z_priq_dumb_best(sys_dlist_t *pq);
|
{
|
||||||
void z_priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread);
|
ARG_UNUSED(pq);
|
||||||
|
|
||||||
/* Scalable Scheduling */
|
sys_dlist_remove(&thread->base.qnode_dlist);
|
||||||
void z_priq_rb_add(struct _priq_rb *pq, struct k_thread *thread);
|
}
|
||||||
void z_priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread);
|
|
||||||
|
|
||||||
/* Multi Queue Scheduling */
|
static ALWAYS_INLINE struct k_thread *z_priq_dumb_best(sys_dlist_t *pq)
|
||||||
struct k_thread *z_priq_mq_best(struct _priq_mq *pq);
|
{
|
||||||
struct k_thread *z_priq_rb_best(struct _priq_rb *pq);
|
struct k_thread *thread = NULL;
|
||||||
|
sys_dnode_t *n = sys_dlist_peek_head(pq);
|
||||||
|
|
||||||
|
if (n != NULL) {
|
||||||
|
thread = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
|
||||||
|
}
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b);
|
static ALWAYS_INLINE void z_priq_rb_add(struct _priq_rb *pq, struct k_thread *thread)
|
||||||
|
{
|
||||||
|
struct k_thread *t;
|
||||||
|
|
||||||
|
thread->base.order_key = pq->next_order_key++;
|
||||||
|
|
||||||
|
/* Renumber at wraparound. This is tiny code, and in practice
|
||||||
|
* will almost never be hit on real systems. BUT on very
|
||||||
|
* long-running systems where a priq never completely empties
|
||||||
|
* AND that contains very large numbers of threads, it can be
|
||||||
|
* a latency glitch to loop over all the threads like this.
|
||||||
|
*/
|
||||||
|
if (!pq->next_order_key) {
|
||||||
|
RB_FOR_EACH_CONTAINER(&pq->tree, t, base.qnode_rb) {
|
||||||
|
t->base.order_key = pq->next_order_key++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_insert(&pq->tree, &thread->base.qnode_rb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE void z_priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread)
|
||||||
|
{
|
||||||
|
rb_remove(&pq->tree, &thread->base.qnode_rb);
|
||||||
|
|
||||||
|
if (!pq->tree.root) {
|
||||||
|
pq->next_order_key = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE struct k_thread *z_priq_rb_best(struct _priq_rb *pq)
|
||||||
|
{
|
||||||
|
struct k_thread *thread = NULL;
|
||||||
|
struct rbnode *n = rb_get_min(&pq->tree);
|
||||||
|
|
||||||
|
if (n != NULL) {
|
||||||
|
thread = CONTAINER_OF(n, struct k_thread, base.qnode_rb);
|
||||||
|
}
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
|
||||||
|
{
|
||||||
|
struct k_thread *thread = NULL;
|
||||||
|
|
||||||
|
for (int i = 0; i < PRIQ_BITMAP_SIZE; ++i) {
|
||||||
|
if (!pq->bitmask[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_64BIT
|
||||||
|
sys_dlist_t *l = &pq->queues[i * 64 + u64_count_trailing_zeros(pq->bitmask[i])];
|
||||||
|
#else
|
||||||
|
sys_dlist_t *l = &pq->queues[i * 32 + u32_count_trailing_zeros(pq->bitmask[i])];
|
||||||
|
#endif
|
||||||
|
sys_dnode_t *n = sys_dlist_peek_head(l);
|
||||||
|
|
||||||
|
if (n != NULL) {
|
||||||
|
thread = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_MULTIQ
|
#ifdef CONFIG_SCHED_MULTIQ
|
||||||
|
@ -105,4 +181,43 @@ static ALWAYS_INLINE void z_priq_mq_remove(struct _priq_mq *pq,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SCHED_MULTIQ */
|
#endif /* CONFIG_SCHED_MULTIQ */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_CPU_MASK
|
||||||
|
static ALWAYS_INLINE struct k_thread *z_priq_dumb_mask_best(sys_dlist_t *pq)
|
||||||
|
{
|
||||||
|
/* With masks enabled we need to be prepared to walk the list
|
||||||
|
* looking for one we can run
|
||||||
|
*/
|
||||||
|
struct k_thread *thread;
|
||||||
|
|
||||||
|
SYS_DLIST_FOR_EACH_CONTAINER(pq, thread, base.qnode_dlist) {
|
||||||
|
if ((thread->base.cpu_mask & BIT(_current_cpu->id)) != 0) {
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_SCHED_CPU_MASK */
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(CONFIG_SCHED_DUMB) || defined(CONFIG_WAITQ_DUMB)
|
||||||
|
static ALWAYS_INLINE void z_priq_dumb_add(sys_dlist_t *pq,
|
||||||
|
struct k_thread *thread)
|
||||||
|
{
|
||||||
|
struct k_thread *t;
|
||||||
|
|
||||||
|
SYS_DLIST_FOR_EACH_CONTAINER(pq, t, base.qnode_dlist) {
|
||||||
|
if (z_sched_prio_cmp(thread, t) > 0) {
|
||||||
|
sys_dlist_insert(&t->base.qnode_dlist,
|
||||||
|
&thread->base.qnode_dlist);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sys_dlist_append(pq, &thread->base.qnode_dlist);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_SCHED_DUMB || CONFIG_WAITQ_DUMB */
|
||||||
|
|
||||||
#endif /* ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_ */
|
#endif /* ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_ */
|
||||||
|
|
|
@ -7,26 +7,7 @@
|
||||||
#include <zephyr/kernel.h>
|
#include <zephyr/kernel.h>
|
||||||
#include <ksched.h>
|
#include <ksched.h>
|
||||||
#include <zephyr/sys/math_extras.h>
|
#include <zephyr/sys/math_extras.h>
|
||||||
|
#include <zephyr/sys/dlist.h>
|
||||||
void z_priq_dumb_remove(sys_dlist_t *pq, struct k_thread *thread)
|
|
||||||
{
|
|
||||||
ARG_UNUSED(pq);
|
|
||||||
|
|
||||||
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
|
||||||
|
|
||||||
sys_dlist_remove(&thread->base.qnode_dlist);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct k_thread *z_priq_dumb_best(sys_dlist_t *pq)
|
|
||||||
{
|
|
||||||
struct k_thread *thread = NULL;
|
|
||||||
sys_dnode_t *n = sys_dlist_peek_head(pq);
|
|
||||||
|
|
||||||
if (n != NULL) {
|
|
||||||
thread = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
|
|
||||||
}
|
|
||||||
return thread;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b)
|
bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b)
|
||||||
{
|
{
|
||||||
|
@ -47,73 +28,3 @@ bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b)
|
||||||
? 1 : 0;
|
? 1 : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void z_priq_rb_add(struct _priq_rb *pq, struct k_thread *thread)
|
|
||||||
{
|
|
||||||
struct k_thread *t;
|
|
||||||
|
|
||||||
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
|
||||||
|
|
||||||
thread->base.order_key = pq->next_order_key++;
|
|
||||||
|
|
||||||
/* Renumber at wraparound. This is tiny code, and in practice
|
|
||||||
* will almost never be hit on real systems. BUT on very
|
|
||||||
* long-running systems where a priq never completely empties
|
|
||||||
* AND that contains very large numbers of threads, it can be
|
|
||||||
* a latency glitch to loop over all the threads like this.
|
|
||||||
*/
|
|
||||||
if (!pq->next_order_key) {
|
|
||||||
RB_FOR_EACH_CONTAINER(&pq->tree, t, base.qnode_rb) {
|
|
||||||
t->base.order_key = pq->next_order_key++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rb_insert(&pq->tree, &thread->base.qnode_rb);
|
|
||||||
}
|
|
||||||
|
|
||||||
void z_priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread)
|
|
||||||
{
|
|
||||||
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
|
||||||
|
|
||||||
rb_remove(&pq->tree, &thread->base.qnode_rb);
|
|
||||||
|
|
||||||
if (!pq->tree.root) {
|
|
||||||
pq->next_order_key = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct k_thread *z_priq_rb_best(struct _priq_rb *pq)
|
|
||||||
{
|
|
||||||
struct k_thread *thread = NULL;
|
|
||||||
struct rbnode *n = rb_get_min(&pq->tree);
|
|
||||||
|
|
||||||
if (n != NULL) {
|
|
||||||
thread = CONTAINER_OF(n, struct k_thread, base.qnode_rb);
|
|
||||||
}
|
|
||||||
return thread;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
|
|
||||||
{
|
|
||||||
struct k_thread *thread = NULL;
|
|
||||||
|
|
||||||
for (int i = 0; i < PRIQ_BITMAP_SIZE; ++i) {
|
|
||||||
if (!pq->bitmask[i]) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_64BIT
|
|
||||||
sys_dlist_t *l = &pq->queues[i * 64 + u64_count_trailing_zeros(pq->bitmask[i])];
|
|
||||||
#else
|
|
||||||
sys_dlist_t *l = &pq->queues[i * 32 + u32_count_trailing_zeros(pq->bitmask[i])];
|
|
||||||
#endif
|
|
||||||
sys_dnode_t *n = sys_dlist_peek_head(l);
|
|
||||||
|
|
||||||
if (n != NULL) {
|
|
||||||
thread = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return thread;
|
|
||||||
}
|
|
||||||
|
|
|
@ -82,43 +82,6 @@ int32_t z_sched_prio_cmp(struct k_thread *thread_1,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SCHED_CPU_MASK
|
|
||||||
static ALWAYS_INLINE struct k_thread *_priq_dumb_mask_best(sys_dlist_t *pq)
|
|
||||||
{
|
|
||||||
/* With masks enabled we need to be prepared to walk the list
|
|
||||||
* looking for one we can run
|
|
||||||
*/
|
|
||||||
struct k_thread *thread;
|
|
||||||
|
|
||||||
SYS_DLIST_FOR_EACH_CONTAINER(pq, thread, base.qnode_dlist) {
|
|
||||||
if ((thread->base.cpu_mask & BIT(_current_cpu->id)) != 0) {
|
|
||||||
return thread;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_SCHED_CPU_MASK */
|
|
||||||
|
|
||||||
#if defined(CONFIG_SCHED_DUMB) || defined(CONFIG_WAITQ_DUMB)
|
|
||||||
static ALWAYS_INLINE void z_priq_dumb_add(sys_dlist_t *pq,
|
|
||||||
struct k_thread *thread)
|
|
||||||
{
|
|
||||||
struct k_thread *t;
|
|
||||||
|
|
||||||
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
|
||||||
|
|
||||||
SYS_DLIST_FOR_EACH_CONTAINER(pq, t, base.qnode_dlist) {
|
|
||||||
if (z_sched_prio_cmp(thread, t) > 0) {
|
|
||||||
sys_dlist_insert(&t->base.qnode_dlist,
|
|
||||||
&thread->base.qnode_dlist);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sys_dlist_append(pq, &thread->base.qnode_dlist);
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_SCHED_DUMB || CONFIG_WAITQ_DUMB */
|
|
||||||
|
|
||||||
static ALWAYS_INLINE void *thread_runq(struct k_thread *thread)
|
static ALWAYS_INLINE void *thread_runq(struct k_thread *thread)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
|
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
|
||||||
|
@ -150,11 +113,15 @@ static ALWAYS_INLINE void *curr_cpu_runq(void)
|
||||||
|
|
||||||
static ALWAYS_INLINE void runq_add(struct k_thread *thread)
|
static ALWAYS_INLINE void runq_add(struct k_thread *thread)
|
||||||
{
|
{
|
||||||
|
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
||||||
|
|
||||||
_priq_run_add(thread_runq(thread), thread);
|
_priq_run_add(thread_runq(thread), thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ALWAYS_INLINE void runq_remove(struct k_thread *thread)
|
static ALWAYS_INLINE void runq_remove(struct k_thread *thread)
|
||||||
{
|
{
|
||||||
|
__ASSERT_NO_MSG(!z_is_idle_thread_object(thread));
|
||||||
|
|
||||||
_priq_run_remove(thread_runq(thread), thread);
|
_priq_run_remove(thread_runq(thread), thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -616,7 +583,7 @@ static void add_to_waitq_locked(struct k_thread *thread, _wait_q_t *wait_q)
|
||||||
|
|
||||||
if (wait_q != NULL) {
|
if (wait_q != NULL) {
|
||||||
thread->base.pended_on = wait_q;
|
thread->base.pended_on = wait_q;
|
||||||
z_priq_wait_add(&wait_q->waitq, thread);
|
_priq_wait_add(&wait_q->waitq, thread);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue