arm64: improved arch_switch() implementation
Make it optimal without the need for an SVC/exception roundtrip on every context switch. Performance numbers from tests/benchmarks/sched: Before: unpend 85 ready 58 switch 258 pend 231 tot 632 (avg 699) After: unpend 85 ready 59 switch 115 pend 138 tot 397 (avg 478) Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
parent
a5b33f89b7
commit
8affac64a7
|
@ -268,20 +268,19 @@ void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
|||
/*
|
||||
* Perform lazy FPU context switching by simply granting or denying
|
||||
* access to FP regs based on FPU ownership before leaving the last
|
||||
* exception level. If current thread doesn't own the FP regs then
|
||||
* it will trap on its first access and then the actual FPU context
|
||||
* switching will occur.
|
||||
*
|
||||
* This is called on every exception exit except for z_arm64_fpu_trap().
|
||||
* exception level in case of exceptions, or during a thread context
|
||||
* switch with the exception level of the new thread being 0.
|
||||
* If current thread doesn't own the FP regs then it will trap on its
|
||||
* first access and then the actual FPU context switching will occur.
|
||||
*/
|
||||
void z_arm64_fpu_exit_exc(void)
|
||||
static void fpu_access_update(unsigned int exc_update_level)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
uint64_t cpacr = read_cpacr_el1();
|
||||
|
||||
if (arch_exception_depth() == 1) {
|
||||
/* We're about to leave exception mode */
|
||||
if (arch_exception_depth() == exc_update_level) {
|
||||
/* We're about to execute non-exception code */
|
||||
if (_current_cpu->arch.fpu_owner == _current) {
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
||||
|
@ -291,14 +290,34 @@ void z_arm64_fpu_exit_exc(void)
|
|||
}
|
||||
} else {
|
||||
/*
|
||||
* Shallower exception levels should always trap on FPU
|
||||
* Any new exception level should always trap on FPU
|
||||
* access as we want to make sure IRQs are disabled before
|
||||
* granting them access.
|
||||
* granting it access (see z_arm64_fpu_trap() documentation).
|
||||
*/
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called on every exception exit except for z_arm64_fpu_trap().
|
||||
* In that case the exception level of interest is 1 (soon to be 0).
|
||||
*/
|
||||
void z_arm64_fpu_exit_exc(void)
|
||||
{
|
||||
fpu_access_update(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called from z_arm64_context_switch(). FPU access may be granted
|
||||
* only if exception level is 0. If we switch to a thread that is still in
|
||||
* some exception context then FPU access would be re-evaluated at exception
|
||||
* exit time via z_arm64_fpu_exit_exc().
|
||||
*/
|
||||
void z_arm64_fpu_thread_context_switch(void)
|
||||
{
|
||||
fpu_access_update(0);
|
||||
}
|
||||
|
||||
int arch_float_disable(struct k_thread *thread)
|
||||
{
|
||||
if (thread != NULL) {
|
||||
|
|
|
@ -38,7 +38,7 @@ GEN_NAMED_OFFSET_SYM(_callee_saved_t, x23, x23_x24);
|
|||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x25, x25_x26);
|
||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x27, x27_x28);
|
||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, x29, x29_sp_el0);
|
||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx);
|
||||
GEN_NAMED_OFFSET_SYM(_callee_saved_t, sp_elx, sp_elx_lr);
|
||||
|
||||
GEN_ABSOLUTE_SYM(___callee_saved_t_SIZEOF, sizeof(struct _callee_saved));
|
||||
|
||||
|
|
|
@ -165,6 +165,9 @@ switch_el:
|
|||
/* EL1 init */
|
||||
bl z_arm64_el1_init
|
||||
|
||||
/* We want to use SP_ELx from now on */
|
||||
msr SPSel, #1
|
||||
|
||||
/* Enable SError interrupts */
|
||||
msr DAIFClr, #(DAIFCLR_ABT_BIT)
|
||||
isb
|
||||
|
|
|
@ -24,7 +24,9 @@ _ASM_FILE_PROLOGUE
|
|||
* Routine to handle context switches
|
||||
*
|
||||
* This function is directly called either by _isr_wrapper() in case of
|
||||
* preemption, or z_arm64_sync_exc() in case of cooperative switching.
|
||||
* preemption, or arch_switch() in case of cooperative switching.
|
||||
*
|
||||
* void z_arm64_context_switch(struct k_thread *new, struct k_thread *old);
|
||||
*/
|
||||
|
||||
GTEXT(z_arm64_context_switch)
|
||||
|
@ -40,9 +42,9 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
|||
stp x27, x28, [x1, #_thread_offset_to_callee_saved_x27_x28]
|
||||
stp x29, x4, [x1, #_thread_offset_to_callee_saved_x29_sp_el0]
|
||||
|
||||
/* Save the current SP_ELx */
|
||||
/* Save the current SP_ELx and return address */
|
||||
mov x4, sp
|
||||
str x4, [x1, #_thread_offset_to_callee_saved_sp_elx]
|
||||
stp x4, lr, [x1, #_thread_offset_to_callee_saved_sp_elx_lr]
|
||||
|
||||
/* save current thread's exception depth */
|
||||
mrs x4, tpidrro_el0
|
||||
|
@ -55,6 +57,17 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
|||
orr x4, x4, x2, lsl #TPIDRROEL0_EXC_SHIFT
|
||||
msr tpidrro_el0, x4
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
/*
|
||||
* Do this after tpidrro_el0 is updated with the new exception
|
||||
* depth value, and before old->switch_handle is updated (making
|
||||
* it available for grab by another CPU) as we still use its stack.
|
||||
*/
|
||||
stp x0, x1, [sp, #-16]!
|
||||
bl z_arm64_fpu_thread_context_switch
|
||||
ldp x0, x1, [sp], #16
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* save old thread into switch handle which is required by
|
||||
* wait_for_switch
|
||||
|
@ -83,8 +96,8 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
|||
/* Restore SP_EL0 */
|
||||
msr sp_el0, x4
|
||||
|
||||
/* Restore SP_EL1 */
|
||||
ldr x4, [x0, #_thread_offset_to_callee_saved_sp_elx]
|
||||
/* Restore SP_EL1 and return address */
|
||||
ldp x4, lr, [x0, #_thread_offset_to_callee_saved_sp_elx_lr]
|
||||
mov sp, x4
|
||||
|
||||
#ifdef CONFIG_USERSPACE
|
||||
|
@ -99,7 +112,7 @@ SECTION_FUNC(TEXT, z_arm64_context_switch)
|
|||
ldp xzr, x30, [sp], #16
|
||||
#endif
|
||||
|
||||
/* Return to z_arm64_sync_exc() or _isr_wrapper() */
|
||||
/* Return to arch_switch() or _isr_wrapper() */
|
||||
ret
|
||||
|
||||
/*
|
||||
|
@ -131,9 +144,6 @@ SECTION_FUNC(TEXT, z_arm64_sync_exc)
|
|||
/* Demux the SVC call */
|
||||
and x1, x0, #0xff
|
||||
|
||||
cmp x1, #_SVC_CALL_CONTEXT_SWITCH
|
||||
beq context_switch
|
||||
|
||||
cmp x1, #_SVC_CALL_RUNTIME_EXCEPT
|
||||
beq oops
|
||||
|
||||
|
@ -179,22 +189,6 @@ oops:
|
|||
mov x0, sp
|
||||
b z_arm64_do_kernel_oops
|
||||
|
||||
context_switch:
|
||||
/*
|
||||
* Retrieve x0 and x1 from the stack:
|
||||
*
|
||||
* - x0 = new_thread->switch_handle = switch_to thread
|
||||
* - x1 = &old_thread->switch_handle = current thread
|
||||
*/
|
||||
ldp x0, x1, [sp, ___esf_t_x0_x1_OFFSET]
|
||||
|
||||
/* Get old thread from x1 */
|
||||
sub x1, x1, ___thread_t_switch_handle_OFFSET
|
||||
|
||||
/* Switch thread */
|
||||
bl z_arm64_context_switch
|
||||
b z_arm64_exit_exc
|
||||
|
||||
inv:
|
||||
mov x0, #0 /* K_ERR_CPU_EXCEPTION */
|
||||
mov x1, sp
|
||||
|
@ -202,8 +196,3 @@ inv:
|
|||
|
||||
/* Return here only in case of recoverable error */
|
||||
b z_arm64_exit_exc
|
||||
|
||||
GTEXT(z_arm64_call_svc)
|
||||
SECTION_FUNC(TEXT, z_arm64_call_svc)
|
||||
svc #_SVC_CALL_CONTEXT_SWITCH
|
||||
ret
|
||||
|
|
|
@ -70,6 +70,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
char *stack_ptr, k_thread_entry_t entry,
|
||||
void *p1, void *p2, void *p3)
|
||||
{
|
||||
extern void z_arm64_exit_exc(void);
|
||||
z_arch_esf_t *pInitCtx;
|
||||
|
||||
/*
|
||||
|
@ -118,6 +119,7 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
|
|||
* z_arm64_userspace_enter() (see comments there)
|
||||
*/
|
||||
thread->callee_saved.sp_elx = (uint64_t)pInitCtx;
|
||||
thread->callee_saved.lr = (uint64_t)z_arm64_exit_exc;
|
||||
|
||||
thread->switch_handle = thread;
|
||||
}
|
||||
|
|
|
@ -31,9 +31,6 @@ static ALWAYS_INLINE bool arch_is_in_isr(void)
|
|||
return arch_curr_cpu()->nested != 0U;
|
||||
}
|
||||
|
||||
|
||||
extern void z_arm64_call_svc(void *switch_to, void **switched_from);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -34,9 +34,13 @@ static ALWAYS_INLINE void arch_kernel_init(void)
|
|||
|
||||
static inline void arch_switch(void *switch_to, void **switched_from)
|
||||
{
|
||||
z_arm64_call_svc(switch_to, switched_from);
|
||||
extern void z_arm64_context_switch(struct k_thread *new,
|
||||
struct k_thread *old);
|
||||
struct k_thread *new = switch_to;
|
||||
struct k_thread *old = CONTAINER_OF(switched_from, struct k_thread,
|
||||
switch_handle);
|
||||
|
||||
return;
|
||||
z_arm64_context_switch(new, old);
|
||||
}
|
||||
|
||||
extern void z_arm64_fatal_error(z_arch_esf_t *esf, unsigned int reason);
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x27_x28_OFFSET)
|
||||
#define _thread_offset_to_callee_saved_x29_sp_el0 \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_x29_sp_el0_OFFSET)
|
||||
#define _thread_offset_to_callee_saved_sp_elx \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_OFFSET)
|
||||
#define _thread_offset_to_callee_saved_sp_elx_lr \
|
||||
(___thread_t_callee_saved_OFFSET + ___callee_saved_t_sp_elx_lr_OFFSET)
|
||||
|
||||
#endif /* ZEPHYR_ARCH_ARM64_INCLUDE_OFFSETS_SHORT_ARCH_H_ */
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#ifndef ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
||||
#define ZEPHYR_INCLUDE_ARCH_ARM64_SYSCALL_H_
|
||||
|
||||
#define _SVC_CALL_CONTEXT_SWITCH 0
|
||||
#define _SVC_CALL_IRQ_OFFLOAD 1
|
||||
#define _SVC_CALL_RUNTIME_EXCEPT 2
|
||||
#define _SVC_CALL_SYSTEM_CALL 3
|
||||
|
|
|
@ -36,6 +36,7 @@ struct _callee_saved {
|
|||
uint64_t x29;
|
||||
uint64_t sp_el0;
|
||||
uint64_t sp_elx;
|
||||
uint64_t lr;
|
||||
};
|
||||
|
||||
typedef struct _callee_saved _callee_saved_t;
|
||||
|
|
Loading…
Reference in a new issue