kernel/arch: enhance the "ready thread" cache

The way the ready thread cache was implemented caused it to not always
be "hot", i.e. there could be some misses, which happened when the
cached thread was taken out of the ready queue. When that happened, it
was not replaced immediately, since doing so could mean that the
replacement might not run because the flow could be interrupted and
another thread could take its place. This was the more conservative
approach that insured that moving a thread to the cache would never be
wasted.

However, this caused two problems:

1. The cache could not be refilled until another thread context-switched
in, since there was no thread in the cache to compare priorities
against.

2. Interrupt exit code would always have to call into C to find what
thread to run when the current thread was not coop and did not have the
scheduler locked. Furthermore, it was possible for this code path to
encounter a cold cache and then it had to find out what thread to run
the long way.

To fix this, filling the cache is now more aggressive, i.e. the next
thread to put in the cache is found even in the case the current cached
thread is context-switched out. This ensures the interrupt exit code is
much faster on the slow path. In addition, since finding the next thread
to run is now always "get it from the cache", which is a simple fetch
from memory (_kernel.ready_q.cache), there is no need to call the more
complex C code.

On the ARM FRDM K64F board, this improvement is seen:

Before:

1- Measure time to switch from ISR back to interrupted task

   switching time is 215 tcs = 1791 nsec

2- Measure time from ISR to executing a different task (rescheduled)

   switch time is 315 tcs = 2625 nsec

After:

1- Measure time to switch from ISR back to interrupted task

   switching time is 130 tcs = 1083 nsec

2- Measure time from ISR to executing a different task (rescheduled)

   switch time is 225 tcs = 1875 nsec

These are the most dramatic improvements, but most of the numbers
generated by the latency_measure test are improved.

Fixes ZEP-1401.

Change-Id: I2eaac147048b1ec71a93bd0a285e743a39533973
Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
This commit is contained in:
Benjamin Walsh 2016-12-02 10:37:27 -05:00 committed by Benjamin Walsh
parent e6ebe3a8b4
commit 88b3691415
17 changed files with 85 additions and 180 deletions

View file

@ -40,7 +40,6 @@ GTEXT(_firq_stack_resume)
#if CONFIG_RGF_NUM_BANKS != 1
GDATA(_firq_stack)
GDATA(_saved_firq_stack)
GTEXT(_is_next_thread_current)
SECTION_VAR(NOINIT, _firq_stack)
.space CONFIG_FIRQ_STACK_SIZE
@ -157,26 +156,10 @@ _firq_check_for_swap:
ld_s r0, [r2, _thread_offset_to_sched_locked]
brgt r0, 0, _firq_no_reschedule
/* check if the current thread needs to be rescheduled */
push_s r2
push_s r1
push_s blink
jl _is_next_thread_current
pop_s blink
pop_s r1
pop_s r2
#if CONFIG_RGF_NUM_BANKS != 1
#ifndef CONFIG_FIRQ_NO_LPCC
/*
* restore lp_count, lp_start, lp_end from r23-r25 in case
* _is_next_thread_current() routine used them
*/
mov lp_count,r23
sr r24, [_ARC_V2_LP_START]
sr r25, [_ARC_V2_LP_END]
#endif
#endif
breq r0, 0, _firq_reschedule
/* Check if the current thread (in r2) is the cached thread */
ld_s r0, [r1, _kernel_offset_to_ready_q_cache]
brne r0, r2, _firq_reschedule
/* fall to no rescheduling */
.balign 4
@ -248,18 +231,7 @@ _firq_reschedule:
st _CAUSE_FIRQ, [r2, _thread_offset_to_relinquish_cause]
/*
* Save needed registers to callee saved ones. It is faster than
* pushing them to stack. It is possible to do since program has
* just saved them and the calling routine will save them in turn
* if it uses them.
*/
mov_s r13, blink
mov_s r14, r1
jl _get_next_ready_thread
mov_s blink, r13
mov_s r1, r14
mov_s r2, r0
ld_s r2, [r1, _kernel_offset_to_ready_q_cache]
st_s r2, [r1, _kernel_offset_to_current]
#ifdef CONFIG_ARC_STACK_CHECKING

View file

@ -176,34 +176,15 @@ _trap_check_for_swap:
brgt r0, 0, _trap_return
/* check if the current thread needs to be rescheduled */
push_s r2
push_s r1
push_s blink
jl _is_next_thread_current
pop_s blink
pop_s r1
pop_s r2
brne r0, 0, _trap_return
ld_s r0, [r1, _kernel_offset_to_ready_q_cache]
breq r0, r2, _trap_return
_save_callee_saved_regs
st _CAUSE_RIRQ, [r2, _thread_offset_to_relinquish_cause]
/* note: Ok to use _CAUSE_RIRQ since everything is saved */
/*
* Save needed registers to callee saved ones. It is faster than
* pushing registers to stack. It is possible to do since program has
* just saved them and the calling routine will save them in turn
* if it uses them.
*/
mov_s r13, blink
mov_s r14, r0
mov_s r15, r1
jl _get_next_ready_thread
mov_s r2, r0
mov_s r1, r15
mov_s r0, r14
mov_s blink, r13
ld_s r2, [r1, _kernel_offset_to_ready_q_cache]
st_s r2, [r1, _kernel_offset_to_current]
/* clear AE bit to forget this was an exception */

View file

@ -35,7 +35,6 @@
GTEXT(_rirq_enter)
GTEXT(_rirq_exit)
GTEXT(_rirq_common_interrupt_swap)
GTEXT(_is_next_thread_current)
#if 0 /* TODO: when FIRQ is not present, all would be regular */
#define NUM_REGULAR_IRQ_PRIO_LEVELS CONFIG_NUM_IRQ_PRIO_LEVELS
@ -156,27 +155,13 @@ _rirq_reschedule_check:
#endif
/* check if the current thread needs to be rescheduled */
push_s r2
push_s r1
push_s blink
jl _is_next_thread_current
pop_s blink
pop_s r1
pop_s r2
brne.d r0, 0, _rirq_no_reschedule
ld_s r0, [r1, _kernel_offset_to_ready_q_cache]
breq.d r0, r2, _rirq_no_reschedule
/* delay slot: always load the current thread's stack */
ld sp, [r2, _thread_offset_to_sp]
/*
* Get the next scheduled thread. On _get_next_ready_thread
* return it is stored in r0.
*/
push_s r2
push_s r1
push_s blink
jl _get_next_ready_thread
pop_s blink
pop_s r1
pop_s r2
/* cached thread to run is in r0, fall through */
.balign 4
_rirq_reschedule:

View file

@ -34,7 +34,6 @@
#include <swap_macros.h>
GTEXT(_Swap)
GTEXT(_get_next_ready_thread)
GDATA(_k_neg_eagain)
GDATA(_kernel)
@ -106,22 +105,8 @@ SECTION_FUNC(TEXT, _Swap)
_save_callee_saved_regs
/* find out incoming thread (fiber or task) */
/*
* Save needed registers to callee saved ones. It is faster than
* pushing them to stack. It is possible to do since program has
* just saved them and the calling routine will save them in turn
* if it uses them.
*/
mov_s r13, blink
mov_s r14, r0
mov_s r15, r1
jl _get_next_ready_thread
mov_s r2, r0
mov_s r1, r15
mov_s r0, r14
mov_s blink, r13
/* get the cached thread to run */
ld_s r2, [r1, _kernel_offset_to_ready_q_cache]
/* entering here, r2 contains the new current thread */
#ifdef CONFIG_ARC_STACK_CHECKING

View file

@ -36,7 +36,6 @@ _ASM_FILE_PROLOGUE
GTEXT(_ExcExit)
GTEXT(_IntExit)
GDATA(_kernel)
GTEXT(_is_next_thread_current)
#if CONFIG_GDB_INFO
#define _EXIT_EXC_IF_FIBER_PREEMPTED beq _ExcExitWithGdbStub
@ -88,9 +87,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, _IntExit)
SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, _ExcExit)
ldr r1, =_kernel
ldr r0, =_kernel
ldr r1, [r1, #_kernel_offset_to_current]
ldr r1, [r0, #_kernel_offset_to_current]
ldr r2, [r1, #_thread_offset_to_prio]
ldr r3, [r1, #_thread_offset_to_sched_locked]
@ -102,20 +101,9 @@ SECTION_SUBSEC_FUNC(TEXT, _HandlerModeExit, _ExcExit)
cmp r3, #0
bgt _EXIT_EXC
push {lr}
/* _is_next_thread_current must be called with interrupts locked */
cpsid i
blx _is_next_thread_current
cpsie i
#if defined(CONFIG_CPU_CORTEX_M0_M0PLUS)
pop {r1}
mov lr, r1
#else
pop {lr}
#endif /* CONFIG_CPU_CORTEX_M0_M0PLUS */
cmp r0, #0
bne _EXIT_EXC
ldr r0, [r0, _kernel_offset_to_ready_q_cache]
cmp r0, r1
beq _EXIT_EXC
/* context switch required, pend the PendSV exception */
ldr r1, =_SCS_ICSR

View file

@ -36,7 +36,6 @@ GTEXT(_Swap)
GTEXT(__svc)
#endif
GTEXT(__pendsv)
GTEXT(_get_next_ready_thread)
GDATA(_k_neg_eagain)
GDATA(_kernel)
@ -114,16 +113,11 @@ SECTION_FUNC(TEXT, __pendsv)
msr BASEPRI, r0
#endif
/* find out incoming thread (fiber or task) */
/* _kernel is still in r1 */
mov.n v2, lr
movs.n v1, r1
blx _get_next_ready_thread
movs.n r1, v1
mov.n lr, v2
movs.n r2, r0
/* fetch the thread to run from the ready queue cache */
ldr r2, [r1, _kernel_offset_to_ready_q_cache]
/* r2 contains the new thread */
str r2, [r1, #_kernel_offset_to_current]
/*

View file

@ -59,6 +59,8 @@ _arch_switch_to_main_thread(char *main_stack, size_t main_stack_size,
_current = (void *)main_stack;
/* the ready queue cache already contains the main thread */
__asm__ __volatile__(
/* move to main() thread stack */

View file

@ -25,7 +25,6 @@ GTEXT(_exception)
/* import */
GTEXT(_Fault)
GTEXT(_Swap)
GTEXT(_is_next_thread_current)
#ifdef CONFIG_IRQ_OFFLOAD
GTEXT(_irq_do_offload)
GTEXT(_offload_routine)
@ -140,8 +139,8 @@ on_irq_stack:
bne r12, zero, no_reschedule
/* Call into the kernel to see if a scheduling decision is necessary */
call _is_next_thread_current
bne r2, zero, no_reschedule
ldw r2, _kernel_offset_to_ready_q_cache(r10)
beq r2, r11, no_reschedule
/*
* A context reschedule is required: keep the volatile registers of

View file

@ -25,7 +25,6 @@ GTEXT(_thread_entry_wrapper)
/* imports */
GTEXT(_sys_k_event_logger_context_switch)
GTEXT(_get_next_ready_thread)
GTEXT(_k_neg_eagain)
/* unsigned int _Swap(unsigned int key)
@ -78,16 +77,17 @@ SECTION_FUNC(exception.other, _Swap)
ori r10, r10, %lo(_kernel)
#endif /* CONFIG_KERNEL_EVENT_LOGGER_CONTEXT_SWITCH */
/* Assign to _kernel.current the return value of
* _get_next_ready_thread()
*/
call _get_next_ready_thread
movhi r10, %hi(_kernel)
ori r10, r10, %lo(_kernel)
stw r2, _kernel_offset_to_current(r10)
/* get cached thread to run */
ldw r2, _kernel_offset_to_ready_q_cache(r10)
/* At this point r2 points to the next thread to be swapped in */
/* the thread to be swapped in is now the current thread */
stw r2, _kernel_offset_to_current(r10)
/* Restore callee-saved registers and switch to the incoming
* thread's stack
*/

View file

@ -42,7 +42,6 @@
/* externs */
GTEXT(_Swap)
GTEXT(_is_next_thread_current)
#ifdef CONFIG_SYS_POWER_MANAGEMENT
GTEXT(_sys_power_save_idle_exit)
@ -303,9 +302,8 @@ alreadyOnIntStack:
/* reschedule only if the scheduler says that we must do so */
call _is_next_thread_current
testl %eax, %eax
jnz noReschedule
cmpl %edx, _kernel_offset_to_ready_q_cache(%ecx)
je noReschedule
/*
* Set the INT_ACTIVE bit in the k_thread to allow the upcoming call to
@ -315,11 +313,6 @@ alreadyOnIntStack:
*/
#if defined(CONFIG_FP_SHARING) || defined(CONFIG_GDB_INFO)
/*
* Reload _kernel.current as _is_next_thread_current()
* might have clobbered it.
*/
movl _kernel + _kernel_offset_to_current, %edx
orl $INT_ACTIVE, _thread_offset_to_flags(%edx)
#endif

View file

@ -38,7 +38,6 @@
GTEXT(_Swap)
/* externs */
GTEXT(_get_next_ready_thread)
GDATA(_k_neg_eagain)
/**
@ -131,7 +130,7 @@ SECTION_FUNC(TEXT, _Swap)
/* Register the context switch */
call _sys_k_event_logger_context_switch
#endif
call _get_next_ready_thread
movl _kernel_offset_to_ready_q_cache(%edi), %eax
/*
* At this point, the %eax register contains the 'k_thread *' of the

View file

@ -37,6 +37,9 @@ GEN_OFFSET_SYM(_kernel_t, irq_stack);
GEN_OFFSET_SYM(_kernel_t, idle);
#endif
GEN_OFFSET_SYM(_kernel_t, ready_q);
GEN_OFFSET_SYM(_ready_q_t, cache);
#ifdef CONFIG_FP_SHARING
GEN_OFFSET_SYM(_kernel_t, current_fp);
#endif

View file

@ -146,6 +146,8 @@ struct _ready_q {
sys_dlist_t q[K_NUM_PRIORITIES];
};
typedef struct _ready_q _ready_q_t;
struct _kernel {
/* nested interrupt count */

View file

@ -30,11 +30,17 @@ extern void _pend_thread(struct k_thread *thread,
_wait_q_t *wait_q, int32_t timeout);
extern void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout);
extern void _move_thread_to_end_of_prio_q(struct k_thread *thread);
extern struct k_thread *_get_next_ready_thread(void);
extern int __must_switch_threads(void);
extern int32_t _ms_to_ticks(int32_t ms);
extern void idle(void *, void *, void *);
/* find which one is the next thread to run */
/* must be called with interrupts locked */
static ALWAYS_INLINE struct k_thread *_get_next_ready_thread(void)
{
return _ready_q.cache;
}
static inline int _is_idle_thread(void *entry_point)
{
return entry_point == idle;

View file

@ -39,6 +39,9 @@
#define _kernel_offset_to_current_fp \
(___kernel_t_current_fp_OFFSET)
#define _kernel_offset_to_ready_q_cache \
(___kernel_t_ready_q_OFFSET + ___ready_q_t_cache_OFFSET)
/* end - kernel */
/* threads */

View file

@ -265,6 +265,17 @@ static void prepare_multithreading(struct k_thread *dummy_thread)
sys_dlist_init(&_ready_q.q[ii]);
}
/*
* prime the cache with the main thread since:
*
* - the cache can never be NULL
* - the main thread will be the one to run first
* - no other thread is initialized yet and thus their priority fields
* contain garbage, which would prevent the cache loading algorithm
* to work as intended
*/
_ready_q.cache = _main_thread;
_new_thread(_main_stack, MAIN_STACK_SIZE,
_main, NULL, NULL, NULL,
CONFIG_MAIN_THREAD_PRIORITY, K_ESSENTIAL);

View file

@ -41,6 +41,26 @@ static void _clear_ready_q_prio_bit(int prio)
*bmap &= ~_get_ready_q_prio_bit(prio);
}
/*
* Find the next thread to run when there is no thread in the cache and update
* the cache.
*/
static struct k_thread *_get_ready_q_head(void)
{
int prio = _get_highest_ready_prio();
int q_index = _get_ready_q_q_index(prio);
sys_dlist_t *list = &_ready_q.q[q_index];
__ASSERT(!sys_dlist_is_empty(list),
"no thread to run (prio: %d, queue index: %u)!\n",
prio, q_index);
struct k_thread *thread =
(struct k_thread *)sys_dlist_peek_head_not_empty(list);
return thread;
}
/*
* Add thread to the ready queue, in the slot for its priority; the thread
* must not be on a wait queue.
@ -61,9 +81,7 @@ void _add_thread_to_ready_q(struct k_thread *thread)
struct k_thread **cache = &_ready_q.cache;
*cache = *cache && _is_prio_higher(thread->base.prio,
(*cache)->base.prio) ?
thread : *cache;
*cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache;
}
/*
@ -85,7 +103,7 @@ void _remove_thread_from_ready_q(struct k_thread *thread)
struct k_thread **cache = &_ready_q.cache;
*cache = *cache == thread ? NULL : *cache;
*cache = *cache == thread ? _get_ready_q_head() : *cache;
}
/* reschedule threads if the scheduler is not locked */
@ -182,37 +200,6 @@ void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout)
_pend_thread(_current, wait_q, timeout);
}
/*
* Find the next thread to run when there is no thread in the cache and update
* the cache.
*/
static struct k_thread *__get_next_ready_thread(void)
{
int prio = _get_highest_ready_prio();
int q_index = _get_ready_q_q_index(prio);
sys_dlist_t *list = &_ready_q.q[q_index];
__ASSERT(!sys_dlist_is_empty(list),
"no thread to run (prio: %d, queue index: %u)!\n",
prio, q_index);
struct k_thread *thread =
(struct k_thread *)sys_dlist_peek_head_not_empty(list);
_ready_q.cache = thread;
return thread;
}
/* find which one is the next thread to run */
/* must be called with interrupts locked */
struct k_thread *_get_next_ready_thread(void)
{
struct k_thread *cache = _ready_q.cache;
return cache ? cache : __get_next_ready_thread();
}
/*
* Check if there is a thread of higher prio than the current one. Should only
* be called if we already know that the current thread is preemptible.
@ -228,11 +215,6 @@ int __must_switch_threads(void)
return _is_prio_higher(_get_highest_ready_prio(), _current->base.prio);
}
int _is_next_thread_current(void)
{
return _get_next_ready_thread() == _current;
}
int k_thread_priority_get(k_tid_t thread)
{
return thread->base.prio;
@ -275,7 +257,7 @@ void _move_thread_to_end_of_prio_q(struct k_thread *thread)
struct k_thread **cache = &_ready_q.cache;
*cache = *cache == thread ? NULL : *cache;
*cache = *cache == thread ? _get_ready_q_head() : *cache;
}
void k_yield(void)