arm64: improved arch_switch() implementation

Make it optimal without the need for an SVC/exception  roundtrip on
every context switch. Performance numbers from tests/benchmarks/sched:

Before:
unpend   85 ready   58 switch  258 pend  231 tot  632 (avg  699)

After:
unpend   85 ready   59 switch  115 pend  138 tot  397 (avg  478)

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2022-03-14 12:51:40 -05:00 committed by Anas Nashif
parent a5b33f89b7
commit 8affac64a7
10 changed files with 63 additions and 49 deletions

View file

@ -268,20 +268,19 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
/*
* Perform lazy FPU context switching by simply granting or denying
* access to FP regs based on FPU ownership before leaving the last
* exception level. If current thread doesn't own the FP regs then
* it will trap on its first access and then the actual FPU context
* switching will occur.
*
* This is called on every exception exit except for z_arm64_fpu_trap().
* exception level in case of exceptions, or during a thread context
* switch with the exception level of the new thread being 0.
* If current thread doesn't own the FP regs then it will trap on its
* first access and then the actual FPU context switching will occur.
*/
void z_arm64_fpu_exit_exc(void)
static void fpu_access_update(unsigned int exc_update_level)
{
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
uint64_t cpacr = read_cpacr_el1();
if (arch_exception_depth() == 1) {
/* We're about to leave exception mode */
if (arch_exception_depth() == exc_update_level) {
/* We're about to execute non-exception code */
if (_current_cpu->arch.fpu_owner == _current) {
/* turn on FPU access */
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
@ -291,14 +290,34 @@ void z_arm64_fpu_exit_exc(void)
}
} else {
/*
* Shallower exception levels should always trap on FPU
* Any new exception level should always trap on FPU
* access as we want to make sure IRQs are disabled before
* granting them access.
* granting it access (see z_arm64_fpu_trap() documentation).
*/
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
}
}
/*
* This is called on every exception exit except for z_arm64_fpu_trap().
* In that case the exception level of interest is 1 (soon to be 0).
*/
void z_arm64_fpu_exit_exc(void)
{
fpu_access_update(1);
}
/*
* This is called from z_arm64_context_switch(). FPU access may be granted
* only if exception level is 0. If we switch to a thread that is still in
* some exception context then FPU access would be re-evaluated at exception
* exit time via z_arm64_fpu_exit_exc().
*/
void z_arm64_fpu_thread_context_switch(void)
{
fpu_access_update(0);
}
int arch_float_disable(struct k_thread *thread)
{
if (thread != NULL) {

View file

@ -38,7 +38,7 @@ GEN_NAMED_OFFSET_SYM(_callee_saved_t, x23, x23_x24);
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x25, x25_x26);
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x27, x27_x28);
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x29, x29_sp_el0);
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx);
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx_lr);
GEN_ABSOLUTE_SYM(___callee_saved_t_SIZEOF, sizeof(struct _callee_saved));

View file

@ -165,6 +165,9 @@ switch_el:
/* EL1 init */
bl z_arm64_el1_init
/* We want to use SP_ELx from now on */
msr SPSel, #1
/* Enable SError interrupts */
msr DAIFClr, #(DAIFCLR_ABT_BIT)
isb

View file

@ -24,7 +24,9 @@ _ASM_FILE_PROLOGUE
* Routine to handle context switches
*
* This function is directly called either by _isr_wrapper() in case of
* preemption, or z_arm64_sync_exc() in case of cooperative switching.
* preemption, or arch_switch() in case of cooperative switching.
*
* void z_arm64_context_switch(struct k_thread *new, struct k_thread *old);
*/
GTEXT(z_arm64_context_switch)
@ -40,9 +42,9 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
stp x27, x28, [x1, #_thread_offset_to_callee_saved_x27_x28]
stp x29, x4, [x1, #_thread_offset_to_callee_saved_x29_sp_el0]
/* Save the current SP_ELx */
/* Save the current SP_ELx and return address */
mov x4, sp
str x4, [x1, #_thread_offset_to_callee_saved_sp_elx]
stp x4, lr, [x1, #_thread_offset_to_callee_saved_sp_elx_lr]
/* save current thread's exception depth */
mrs x4, tpidrro_el0
@ -55,6 +57,17 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
orr x4, x4, x2, lsl #TPIDRROEL0_EXC_SHIFT
msr tpidrro_el0, x4
#ifdef CONFIG_FPU_SHARING
/*
* Do this after tpidrro_el0 is updated with the new exception
* depth value, and before old->switch_handle is updated (making
* it available for grab by another CPU) as we still use its stack.
*/
stp x0, x1, [sp, #-16]!
bl z_arm64_fpu_thread_context_switch
ldp x0, x1, [sp], #16
#endif
#ifdef CONFIG_SMP
/* save old thread into switch handle which is required by
* wait_for_switch
@ -83,8 +96,8 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
/* Restore SP_EL0 */
msr sp_el0, x4
/* Restore SP_EL1 */
ldr x4, [x0, #_thread_offset_to_callee_saved_sp_elx]
/* Restore SP_EL1 and return address */
ldp x4, lr, [x0, #_thread_offset_to_callee_saved_sp_elx_lr]
mov sp, x4
#ifdef CONFIG_USERSPACE
@ -99,7 +112,7 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
ldp xzr, x30, [sp], #16
#endif
/* Return to z_arm64_sync_exc() or _isr_wrapper() */
/* Return to arch_switch() or _isr_wrapper() */
ret
/*
@ -131,9 +144,6 @@ SECTION_FUNC(TEXT, z_arm64_sync_exc)
/* Demux the SVC call */
and x1, x0, #0xff
cmp x1, #_SVC_CALL_CONTEXT_SWITCH
beq context_switch
cmp x1, #_SVC_CALL_RUNTIME_EXCEPT
beq oops
@ -179,22 +189,6 @@ oops:
mov x0, sp
b z_arm64_do_kernel_oops
context_switch:
/*
* Retrieve x0 and x1 from the stack:
*
* - x0 = new_thread->switch_handle = switch_to thread
* - x1 = &old_thread->switch_handle = current thread
*/
ldp x0, x1, [sp, ___esf_t_x0_x1_OFFSET]
/* Get old thread from x1 */
sub x1, x1, ___thread_t_switch_handle_OFFSET
/* Switch thread */
bl z_arm64_context_switch
b z_arm64_exit_exc
inv:
mov x0, #0 /* K_ERR_CPU_EXCEPTION */
mov x1, sp
@ -202,8 +196,3 @@ inv:
/* Return here only in case of recoverable error */
b z_arm64_exit_exc
GTEXT(z_arm64_call_svc)
SECTION_FUNC(TEXT, z_arm64_call_svc)
svc #_SVC_CALL_CONTEXT_SWITCH
ret

View file

@ -70,6 +70,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
char *stack_ptr, k_thread_entry_t entry,
void *p1, void *p2, void *p3)
{
extern void z_arm64_exit_exc(void);
z_arch_esf_t *pInitCtx;
/*
@ -118,6 +119,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
* z_arm64_userspace_enter() (see comments there)
*/
thread->callee_saved.sp_elx = (uint64_t)pInitCtx;
thread->callee_saved.lr = (uint64_t)z_arm64_exit_exc;
thread->switch_handle = thread;
}

View file

@ -31,9 +31,6 @@ static ALWAYS_INLINE bool arch_is_in_isr(void)
return arch_curr_cpu()->nested != 0U;
}
extern void z_arm64_call_svc(void *switch_to, void **switched_from);
#ifdef __cplusplus
}
#endif

View file

@ -34,9 +34,13 @@ static ALWAYS_INLINE void arch_kernel_init(void)
static inline void arch_switch(void *switch_to, void **switched_from)
{
z_arm64_call_svc(switch_to, switched_from);
extern void z_arm64_context_switch(struct k_thread *new,
struct k_thread *old);
struct k_thread *new = switch_to;
struct k_thread *old = CONTAINER_OF(switched_from, struct k_thread,
switch_handle);
return;
z_arm64_context_switch(new, old);
}
extern void z_arm64_fatal_error(z_arch_esf_t *esf, unsigned int reason);

View file

@ -24,7 +24,7 @@
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x27_x28_OFFSET)
#define _thread_offset_to_callee_saved_x29_sp_el0 \
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x29_sp_el0_OFFSET)
#define _thread_offset_to_callee_saved_sp_elx \
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_OFFSET)
#define _thread_offset_to_callee_saved_sp_elx_lr \
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_lr_OFFSET)
#endif /* ZEPHYR_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ */

View file

@ -16,7 +16,6 @@
#ifndef ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
#define ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
#define _SVC_CALL_CONTEXT_SWITCH 0
#define _SVC_CALL_IRQ_OFFLOAD 1
#define _SVC_CALL_RUNTIME_EXCEPT 2
#define _SVC_CALL_SYSTEM_CALL 3

View file

@ -36,6 +36,7 @@ struct _callee_saved {
uint64_t x29;
uint64_t sp_el0;
uint64_t sp_elx;
uint64_t lr;
};
typedef struct _callee_saved _callee_saved_t;