arm64: FPU context switching support
This adds FPU sharing support with a lazy context switching algorithm. Every thread is allowed to use FPU/SIMD registers. In fact, the compiler may insert FPU reg accesses in anycontext to optimize even non-FP code unless the -mgeneral-regs-only compiler flag is used, but Zephyr currently doesn't support such a build. It is therefore possible to do FP access in IRS as well with this patch although IRQs are then disabled to prevent nested IRQs in such cases. Because the thread object grows in size, some tests have to be adjusted. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
parent
a82fff04ff
commit
f1f63dda17
|
@ -20,6 +20,7 @@ zephyr_library_sources(
|
|||
vector_table.S
|
||||
)
|
||||
|
||||
zephyr_library_sources_ifdef(CONFIG_FPU_SHARING fpu.c fpu.S)
|
||||
zephyr_library_sources_ifdef(CONFIG_ARM_MMU mmu.c mmu.S)
|
||||
zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace.S)
|
||||
zephyr_library_sources_ifdef(CONFIG_GEN_SW_ISR_TABLE isr_wrapper.S)
|
||||
|
|
|
@ -8,6 +8,9 @@ config CPU_CORTEX_A
|
|||
select CPU_CORTEX
|
||||
select HAS_FLASH_LOAD_OFFSET
|
||||
select SCHED_IPI_SUPPORTED if SMP
|
||||
select CPU_HAS_FPU
|
||||
imply FPU
|
||||
imply FPU_SHARING
|
||||
help
|
||||
This option signifies the use of a CPU of the Cortex-A family.
|
||||
|
||||
|
|
65
arch/arm64/core/fpu.S
Normal file
65
arch/arm64/core/fpu.S
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2021 BayLibre SAS
|
||||
* Written by: Nicolas Pitre
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <toolchain.h>
|
||||
#include <linker/sections.h>
|
||||
|
||||
_ASM_FILE_PROLOGUE
|
||||
|
||||
GTEXT(z_arm64_fpu_save)
|
||||
SECTION_FUNC(TEXT, z_arm64_fpu_save)
|
||||
|
||||
stp q0, q1, [x0, #(16 * 0)]
|
||||
stp q2, q3, [x0, #(16 * 2)]
|
||||
stp q4, q5, [x0, #(16 * 4)]
|
||||
stp q6, q7, [x0, #(16 * 6)]
|
||||
stp q8, q9, [x0, #(16 * 8)]
|
||||
stp q10, q11, [x0, #(16 * 10)]
|
||||
stp q12, q13, [x0, #(16 * 12)]
|
||||
stp q14, q15, [x0, #(16 * 14)]
|
||||
stp q16, q17, [x0, #(16 * 16)]
|
||||
stp q18, q19, [x0, #(16 * 18)]
|
||||
stp q20, q21, [x0, #(16 * 20)]
|
||||
stp q22, q23, [x0, #(16 * 22)]
|
||||
stp q24, q25, [x0, #(16 * 24)]
|
||||
stp q26, q27, [x0, #(16 * 26)]
|
||||
stp q28, q29, [x0, #(16 * 28)]
|
||||
stp q30, q31, [x0, #(16 * 30)]
|
||||
|
||||
mrs x1, fpsr
|
||||
mrs x2, fpcr
|
||||
str w1, [x0, #(16 * 32 + 0)]
|
||||
str w2, [x0, #(16 * 32 + 4)]
|
||||
|
||||
ret
|
||||
|
||||
GTEXT(z_arm64_fpu_restore)
|
||||
SECTION_FUNC(TEXT, z_arm64_fpu_restore)
|
||||
|
||||
ldp q0, q1, [x0, #(16 * 0)]
|
||||
ldp q2, q3, [x0, #(16 * 2)]
|
||||
ldp q4, q5, [x0, #(16 * 4)]
|
||||
ldp q6, q7, [x0, #(16 * 6)]
|
||||
ldp q8, q9, [x0, #(16 * 8)]
|
||||
ldp q10, q11, [x0, #(16 * 10)]
|
||||
ldp q12, q13, [x0, #(16 * 12)]
|
||||
ldp q14, q15, [x0, #(16 * 14)]
|
||||
ldp q16, q17, [x0, #(16 * 16)]
|
||||
ldp q18, q19, [x0, #(16 * 18)]
|
||||
ldp q20, q21, [x0, #(16 * 20)]
|
||||
ldp q22, q23, [x0, #(16 * 22)]
|
||||
ldp q24, q25, [x0, #(16 * 24)]
|
||||
ldp q26, q27, [x0, #(16 * 26)]
|
||||
ldp q28, q29, [x0, #(16 * 28)]
|
||||
ldp q30, q31, [x0, #(16 * 30)]
|
||||
|
||||
ldr w1, [x0, #(16 * 32 + 0)]
|
||||
ldr w2, [x0, #(16 * 32 + 4)]
|
||||
msr fpsr, x1
|
||||
msr fpcr, x2
|
||||
|
||||
ret
|
260
arch/arm64/core/fpu.c
Normal file
260
arch/arm64/core/fpu.c
Normal file
|
@ -0,0 +1,260 @@
|
|||
/*
|
||||
* Copyright (c) 2021 BayLibre SAS
|
||||
* Written by: Nicolas Pitre
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <kernel.h>
|
||||
#include <kernel_structs.h>
|
||||
#include <kernel_arch_interface.h>
|
||||
#include <arch/cpu.h>
|
||||
|
||||
/* to be found in fpu.S */
|
||||
extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context);
|
||||
extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context);
|
||||
|
||||
#define FPU_DEBUG 0
|
||||
|
||||
#if FPU_DEBUG
|
||||
|
||||
/*
|
||||
* Debug traces have to be produced without printk() or any other functions
|
||||
* using a va_list as va_start() always copy the FPU registers that could be
|
||||
* used to pass float arguments, and that triggers an FPU access trap.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static void DBG(char *msg, struct k_thread *th)
|
||||
{
|
||||
char buf[80], *p;
|
||||
unsigned int v;
|
||||
|
||||
strcpy(buf, "CPU# exc# ");
|
||||
buf[3] = '0' + _current_cpu->id;
|
||||
buf[8] = '0' + arch_exception_depth();
|
||||
strcat(buf, _current->name);
|
||||
strcat(buf, ": ");
|
||||
strcat(buf, msg);
|
||||
strcat(buf, " ");
|
||||
strcat(buf, th->name);
|
||||
|
||||
|
||||
v = *(unsigned char *)&th->arch.saved_fp_context;
|
||||
p = buf + strlen(buf);
|
||||
*p++ = ' ';
|
||||
*p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a');
|
||||
*p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a');
|
||||
*p++ = '\n';
|
||||
*p = 0;
|
||||
|
||||
k_str_out(buf, p - buf);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void DBG(char *msg, struct k_thread *t) { }
|
||||
|
||||
#endif /* FPU_DEBUG */
|
||||
|
||||
/*
|
||||
* Flush FPU content and disable access.
|
||||
* This is called locally and also from flush_fpu_ipi_handler().
|
||||
*/
|
||||
void z_arm64_flush_local_fpu(void)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
struct k_thread *owner = _current_cpu->arch.fpu_owner;
|
||||
|
||||
if (owner != NULL) {
|
||||
uint64_t cpacr = read_cpacr_el1();
|
||||
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
||||
isb();
|
||||
|
||||
/* save current owner's content */
|
||||
z_arm64_fpu_save(&owner->arch.saved_fp_context);
|
||||
/* make sure content made it to memory before releasing */
|
||||
dsb();
|
||||
/* release ownership */
|
||||
_current_cpu->arch.fpu_owner = NULL;
|
||||
DBG("disable", owner);
|
||||
|
||||
/* disable FPU access */
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void flush_owned_fpu(struct k_thread *thread)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
int i;
|
||||
|
||||
/* search all CPUs for the owner we want */
|
||||
for (i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
|
||||
if (_kernel.cpus[i].arch.fpu_owner != thread) {
|
||||
continue;
|
||||
}
|
||||
/* we found it live on CPU i */
|
||||
if (i == _current_cpu->id) {
|
||||
z_arm64_flush_local_fpu();
|
||||
} else {
|
||||
/* the FPU context is live on another CPU */
|
||||
z_arm64_flush_fpu_ipi(i);
|
||||
|
||||
/*
|
||||
* Wait for it only if this is about the thread
|
||||
* currently running on this CPU. Otherwise the
|
||||
* other CPU running some other thread could regain
|
||||
* ownership the moment it is removed from it and
|
||||
* we would be stuck here.
|
||||
*
|
||||
* Also, if this is for the thread running on this
|
||||
* CPU, then we preemptively flush any live context
|
||||
* on this CPU as well since we're likely to
|
||||
* replace it, and this avoids a deadlock where
|
||||
* two CPUs want to pull each other's FPU context.
|
||||
*/
|
||||
if (thread == _current) {
|
||||
z_arm64_flush_local_fpu();
|
||||
while (_kernel.cpus[i].arch.fpu_owner == thread) {
|
||||
dsb();
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void z_arm64_fpu_enter_exc(void)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
/* always deny FPU access whenever an exception is entered */
|
||||
write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
isb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the FPU trap.
|
||||
*
|
||||
* This usually means that FP regs belong to another thread. Save them
|
||||
* to that thread's save area and restore the current thread's content.
|
||||
*
|
||||
* We also get here when FP regs are used while in exception as FP access
|
||||
* is always disabled by default in that case. If so we save the FPU content
|
||||
* to the owning thread and simply enable FPU access. Exceptions should be
|
||||
* short and don't have persistent register contexts when they're done so
|
||||
* there is nothing to save/restore for that context... as long as we
|
||||
* don't get interrupted that is. To ensure that we mask interrupts to
|
||||
* the triggering exception context.
|
||||
*/
|
||||
void z_arm64_fpu_trap(z_arch_esf_t *esf)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
|
||||
isb();
|
||||
|
||||
/* save current owner's content if any */
|
||||
struct k_thread *owner = _current_cpu->arch.fpu_owner;
|
||||
|
||||
if (owner) {
|
||||
z_arm64_fpu_save(&owner->arch.saved_fp_context);
|
||||
dsb();
|
||||
_current_cpu->arch.fpu_owner = NULL;
|
||||
DBG("save", owner);
|
||||
}
|
||||
|
||||
if (arch_exception_depth() > 1) {
|
||||
/*
|
||||
* We were already in exception when the FPU access trap.
|
||||
* We give it access and prevent any further IRQ recursion
|
||||
* by disabling IRQs as we wouldn't be able to preserve the
|
||||
* interrupted exception's FPU context.
|
||||
*/
|
||||
esf->spsr |= DAIF_IRQ_BIT;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Make sure the FPU context we need isn't live on another CPU.
|
||||
* The current CPU's FPU context is NULL at this point.
|
||||
*/
|
||||
flush_owned_fpu(_current);
|
||||
#endif
|
||||
|
||||
/* become new owner */
|
||||
_current_cpu->arch.fpu_owner = _current;
|
||||
|
||||
/* restore our content */
|
||||
z_arm64_fpu_restore(&_current->arch.saved_fp_context);
|
||||
DBG("restore", _current);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform lazy FPU context switching by simply granting or denying
|
||||
* access to FP regs based on FPU ownership before leaving the last
|
||||
* exception level. If current thread doesn't own the FP regs then
|
||||
* it will trap on its first access and then the actual FPU context
|
||||
* switching will occur.
|
||||
*
|
||||
* This is called on every exception exit except for z_arm64_fpu_trap().
|
||||
*/
|
||||
void z_arm64_fpu_exit_exc(void)
|
||||
{
|
||||
__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
|
||||
|
||||
uint64_t cpacr = read_cpacr_el1();
|
||||
|
||||
if (arch_exception_depth() == 1) {
|
||||
/* We're about to leave exception mode */
|
||||
if (_current_cpu->arch.fpu_owner == _current) {
|
||||
/* turn on FPU access */
|
||||
write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
|
||||
} else {
|
||||
/* deny FPU access */
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Shallower exception levels should always trap on FPU
|
||||
* access as we want to make sure IRQs are disabled before
|
||||
* granting them access.
|
||||
*/
|
||||
write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
|
||||
}
|
||||
}
|
||||
|
||||
int arch_float_disable(struct k_thread *thread)
|
||||
{
|
||||
if (thread != NULL) {
|
||||
unsigned int key = arch_irq_lock();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
flush_owned_fpu(thread);
|
||||
#else
|
||||
if (thread == _current_cpu->arch.fpu_owner) {
|
||||
z_arm64_flush_local_fpu();
|
||||
}
|
||||
#endif
|
||||
|
||||
arch_irq_unlock(key);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int arch_float_enable(struct k_thread *thread, unsigned int options)
|
||||
{
|
||||
/* floats always gets enabled automatically at the moment */
|
||||
return 0;
|
||||
}
|
|
@ -149,7 +149,7 @@ void z_arm64_el1_init(void)
|
|||
isb();
|
||||
|
||||
reg = 0U; /* RES0 */
|
||||
reg |= CPACR_EL1_FPEN_NOTRAP; /* Do not trap NEON/SIMD/FP */
|
||||
reg |= CPACR_EL1_FPEN_NOTRAP; /* Do not trap NEON/SIMD/FP initially */
|
||||
/* TODO: CONFIG_FLOAT_*_FORBIDDEN */
|
||||
write_cpacr_el1(reg);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#define SGI_SCHED_IPI 0
|
||||
#define SGI_PTABLE_IPI 1
|
||||
#define SGI_FPU_IPI 2
|
||||
|
||||
struct boot_params {
|
||||
uint64_t mpid;
|
||||
|
@ -129,6 +130,9 @@ void z_arm64_secondary_start(void)
|
|||
#ifdef CONFIG_USERSPACE
|
||||
irq_enable(SGI_PTABLE_IPI);
|
||||
#endif
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
irq_enable(SGI_FPU_IPI);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
fn = arm64_cpu_boot_params.fn;
|
||||
|
@ -191,6 +195,24 @@ void z_arm64_ptable_ipi(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
void flush_fpu_ipi_handler(const void *unused)
|
||||
{
|
||||
ARG_UNUSED(unused);
|
||||
|
||||
disable_irq();
|
||||
z_arm64_flush_local_fpu();
|
||||
/* no need to re-enable IRQs here */
|
||||
}
|
||||
|
||||
void z_arm64_flush_fpu_ipi(unsigned int cpu)
|
||||
{
|
||||
const uint64_t mpidr = GET_MPIDR();
|
||||
|
||||
gic_raise_sgi(SGI_FPU_IPI, mpidr, (1 << cpu));
|
||||
}
|
||||
#endif
|
||||
|
||||
static int arm64_smp_init(const struct device *dev)
|
||||
{
|
||||
ARG_UNUSED(dev);
|
||||
|
@ -206,6 +228,10 @@ static int arm64_smp_init(const struct device *dev)
|
|||
IRQ_CONNECT(SGI_PTABLE_IPI, IRQ_DEFAULT_PRIORITY, ptable_ipi_handler, NULL, 0);
|
||||
irq_enable(SGI_PTABLE_IPI);
|
||||
#endif
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
IRQ_CONNECT(SGI_FPU_IPI, IRQ_DEFAULT_PRIORITY, flush_fpu_ipi_handler, NULL, 0);
|
||||
irq_enable(SGI_FPU_IPI);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -125,6 +125,15 @@ SECTION_FUNC(TEXT, z_arm64_sync_exc)
|
|||
mrs x0, esr_el1
|
||||
lsr x1, x0, #26
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
cmp x1, #0x07 /*Access to SIMD or floating-point */
|
||||
bne 1f
|
||||
mov x0, sp
|
||||
bl z_arm64_fpu_trap
|
||||
b z_arm64_exit_exc_fpu_done
|
||||
1:
|
||||
#endif
|
||||
|
||||
cmp x1, #0x15 /* 0x15 = SVC */
|
||||
bne inv
|
||||
|
||||
|
|
|
@ -58,6 +58,10 @@ _ASM_FILE_PROLOGUE
|
|||
add \xreg0, \xreg0, \xreg1
|
||||
msr tpidrro_el0, \xreg0
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
bl z_arm64_fpu_enter_exc
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -209,6 +213,13 @@ SECTION_FUNC(TEXT, z_arm64_serror)
|
|||
GTEXT(z_arm64_exit_exc)
|
||||
SECTION_FUNC(TEXT, z_arm64_exit_exc)
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
bl z_arm64_fpu_exit_exc
|
||||
|
||||
GTEXT(z_arm64_exit_exc_fpu_done)
|
||||
SECTION_FUNC(TEXT, z_arm64_exit_exc_fpu_done)
|
||||
#endif
|
||||
|
||||
ldp x0, x1, [sp, ___esf_t_spsr_elr_OFFSET]
|
||||
msr spsr_el1, x0
|
||||
msr elr_el1, x1
|
||||
|
|
|
@ -44,6 +44,11 @@ extern void z_arm64_userspace_enter(z_arch_esf_t *esf, uintptr_t sp_el0);
|
|||
extern void z_arm64_set_ttbr0(uintptr_t ttbr0);
|
||||
extern void z_arm64_ptable_ipi(void);
|
||||
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
void z_arm64_flush_local_fpu(void);
|
||||
void z_arm64_flush_fpu_ipi(unsigned int cpu);
|
||||
#endif
|
||||
|
||||
#endif /* _ASMLANGUAGE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -133,6 +133,32 @@ If an ARM thread does not require use of the floating point registers any
|
|||
more, it can call :c:func:`k_float_disable`. This instructs the kernel
|
||||
not to save or restore its FP context during thread context switching.
|
||||
|
||||
ARM64 architecture
|
||||
------------------
|
||||
|
||||
.. note::
|
||||
The Shared FP registers mode is the default Floating Point
|
||||
Services mode on ARM64. The compiler is free to optimize code
|
||||
using FP/SIMD registers, and library functions such as memcpy
|
||||
are known to make use of them.
|
||||
|
||||
On the ARM64 (Aarch64) architecture the kernel treats each thread as a FPU
|
||||
user on a case-by-case basis. A "lazy save" algorithm is used during context
|
||||
switching which updates the floating point registers only when it is absolutely
|
||||
necessary. For example, the registers are *not* saved when switching from an
|
||||
FPU user to a non-user thread, and then back to the original FPU user.
|
||||
|
||||
FPU register usage by ISRs is supported although not recommended. When an
|
||||
ISR uses floating point or SIMD registers, then the access is trapped, the
|
||||
current FPU user context is saved in the thread object and the ISR is resumed
|
||||
with interrupts disabled so to prevent another IRQ from interrupting the ISR
|
||||
and potentially requesting FPU usage. Because ISR don't have a persistent
|
||||
register context, there are no provision for saving an ISR's FPU context
|
||||
either, hence the IRQ disabling.
|
||||
|
||||
Each thread object becomes 512 bytes larger when Shared FP registers mode
|
||||
is enabled.
|
||||
|
||||
ARCv2 architecture
|
||||
------------------
|
||||
|
||||
|
|
|
@ -9,7 +9,9 @@
|
|||
|
||||
/* Per CPU architecture specifics */
|
||||
struct _cpu_arch {
|
||||
/* content coming soon */
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
struct k_thread *fpu_owner;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* ZEPHYR_INCLUDE_ARM64_STRUCTS_H_ */
|
||||
|
|
|
@ -40,9 +40,20 @@ struct _callee_saved {
|
|||
|
||||
typedef struct _callee_saved _callee_saved_t;
|
||||
|
||||
struct z_arm64_fp_context {
|
||||
__int128 q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
__int128 q8, q9, q10, q11, q12, q13, q14, q15;
|
||||
__int128 q16, q17, q18, q19, q20, q21, q22, q23;
|
||||
__int128 q24, q25, q26, q27, q28, q29, q30, q31;
|
||||
uint32_t fpsr, fpcr;
|
||||
};
|
||||
|
||||
struct _thread_arch {
|
||||
#ifdef CONFIG_USERSPACE
|
||||
struct arm_mmu_ptables *ptables;
|
||||
#endif
|
||||
#ifdef CONFIG_FPU_SHARING
|
||||
struct z_arm64_fp_context saved_fp_context;
|
||||
#endif
|
||||
uint8_t exception_depth;
|
||||
};
|
||||
|
|
|
@ -116,6 +116,17 @@ static void cpu_hold(void *arg1, void *arg2, void *arg3)
|
|||
|
||||
k_sem_give(&cpuhold_sem);
|
||||
|
||||
#if defined(CONFIG_ARM64) && defined(CONFIG_FPU_SHARING)
|
||||
/*
|
||||
* We'll be spinning with IRQs disabled. The flush-your-FPU request
|
||||
* IPI will never be serviced during that time. Therefore we flush
|
||||
* the FPU preemptively here to prevent any other CPU waiting after
|
||||
* this CPU forever and deadlock the system.
|
||||
*/
|
||||
extern void z_arm64_flush_local_fpu(void);
|
||||
z_arm64_flush_local_fpu();
|
||||
#endif
|
||||
|
||||
while (cpuhold_active) {
|
||||
k_busy_wait(1000);
|
||||
}
|
||||
|
|
|
@ -83,6 +83,21 @@ struct fp_non_volatile_register_set {
|
|||
float s[16];
|
||||
};
|
||||
|
||||
#define SIZEOF_FP_VOLATILE_REGISTER_SET \
|
||||
sizeof(struct fp_volatile_register_set)
|
||||
#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET \
|
||||
sizeof(struct fp_non_volatile_register_set)
|
||||
|
||||
#elif defined(CONFIG_ARM64)
|
||||
|
||||
struct fp_volatile_register_set {
|
||||
__int128 regs[16]; /* q0..q15 */
|
||||
};
|
||||
|
||||
struct fp_non_volatile_register_set {
|
||||
__int128 regs[16]; /* q16..q31 */
|
||||
};
|
||||
|
||||
#define SIZEOF_FP_VOLATILE_REGISTER_SET \
|
||||
sizeof(struct fp_volatile_register_set)
|
||||
#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET \
|
||||
|
|
116
tests/kernel/fpu_sharing/generic/src/float_regs_arm64_gcc.h
Normal file
116
tests/kernel/fpu_sharing/generic/src/float_regs_arm64_gcc.h
Normal file
|
@ -0,0 +1,116 @@
|
|||
/**
|
||||
* @file
|
||||
* @brief ARM64 GCC specific floating point register macros
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 BayLibre SAS
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _FLOAT_REGS_ARM64_GCC_H
|
||||
#define _FLOAT_REGS_ARM64_GCC_H
|
||||
|
||||
#include <toolchain.h>
|
||||
#include "float_context.h"
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load all floating point registers
|
||||
*
|
||||
* This function loads ALL floating point registers pointed to by @a regs.
|
||||
* It is expected that a subsequent call to _store_all_float_registers()
|
||||
* will be issued to dump the floating point registers to memory.
|
||||
*
|
||||
* The format/organization of 'struct fp_register_set'; the generic C test
|
||||
* code (main.c) merely treat the register set as an array of bytes.
|
||||
*
|
||||
* The only requirement is that the arch specific implementations of
|
||||
* _load_all_float_registers() and _store_all_float_registers() agree
|
||||
* on the format.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
static inline void _load_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"ldp q0, q1, [x0, #(16 * 0)]\n\t"
|
||||
"ldp q2, q3, [x0, #(16 * 2)]\n\t"
|
||||
"ldp q4, q5, [x0, #(16 * 4)]\n\t"
|
||||
"ldp q6, q7, [x0, #(16 * 6)]\n\t"
|
||||
"ldp q8, q9, [x0, #(16 * 8)]\n\t"
|
||||
"ldp q10, q11, [x0, #(16 * 10)]\n\t"
|
||||
"ldp q12, q13, [x0, #(16 * 12)]\n\t"
|
||||
"ldp q14, q15, [x0, #(16 * 14)]\n\t"
|
||||
"ldp q16, q17, [x0, #(16 * 16)]\n\t"
|
||||
"ldp q18, q19, [x0, #(16 * 18)]\n\t"
|
||||
"ldp q20, q21, [x0, #(16 * 20)]\n\t"
|
||||
"ldp q22, q23, [x0, #(16 * 22)]\n\t"
|
||||
"ldp q24, q25, [x0, #(16 * 24)]\n\t"
|
||||
"ldp q26, q27, [x0, #(16 * 26)]\n\t"
|
||||
"ldp q28, q29, [x0, #(16 * 28)]\n\t"
|
||||
"ldp q30, q31, [x0, #(16 * 30)]"
|
||||
:
|
||||
: "r" (regs)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Dump all floating point registers to memory
|
||||
*
|
||||
* This function stores ALL floating point registers to the memory buffer
|
||||
* specified by @a regs. It is expected that a previous invocation of
|
||||
* _load_all_float_registers() occurred to load all the floating point
|
||||
* registers from a memory buffer.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _store_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"stp q0, q1, [x0, #(16 * 0)]\n\t"
|
||||
"stp q2, q3, [x0, #(16 * 2)]\n\t"
|
||||
"stp q4, q5, [x0, #(16 * 4)]\n\t"
|
||||
"stp q6, q7, [x0, #(16 * 6)]\n\t"
|
||||
"stp q8, q9, [x0, #(16 * 8)]\n\t"
|
||||
"stp q10, q11, [x0, #(16 * 10)]\n\t"
|
||||
"stp q12, q13, [x0, #(16 * 12)]\n\t"
|
||||
"stp q14, q15, [x0, #(16 * 14)]\n\t"
|
||||
"stp q16, q17, [x0, #(16 * 16)]\n\t"
|
||||
"stp q18, q19, [x0, #(16 * 18)]\n\t"
|
||||
"stp q20, q21, [x0, #(16 * 20)]\n\t"
|
||||
"stp q22, q23, [x0, #(16 * 22)]\n\t"
|
||||
"stp q24, q25, [x0, #(16 * 24)]\n\t"
|
||||
"stp q26, q27, [x0, #(16 * 26)]\n\t"
|
||||
"stp q28, q29, [x0, #(16 * 28)]\n\t"
|
||||
"stp q30, q31, [x0, #(16 * 30)]"
|
||||
:
|
||||
: "r" (regs)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load then dump all float registers to memory
|
||||
*
|
||||
* This function loads ALL floating point registers from the memory buffer
|
||||
* specified by @a regs, and then stores them back to that buffer.
|
||||
*
|
||||
* This routine is called by a high priority thread prior to calling a primitive
|
||||
* that pends and triggers a co-operative context switch to a low priority
|
||||
* thread.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _load_then_store_all_float_registers(
|
||||
struct fp_register_set *regs)
|
||||
{
|
||||
_load_all_float_registers(regs);
|
||||
_store_all_float_registers(regs);
|
||||
}
|
||||
#endif /* _FLOAT_REGS_ARM64_GCC_H */
|
|
@ -50,6 +50,12 @@
|
|||
#else
|
||||
#include "float_regs_arm_other.h"
|
||||
#endif /* __GNUC__ */
|
||||
#elif defined(CONFIG_ARM64)
|
||||
#if defined(__GNUC__)
|
||||
#include "float_regs_arm64_gcc.h"
|
||||
#else
|
||||
#include "float_regs_arm64_other.h"
|
||||
#endif /* __GNUC__ */
|
||||
#elif defined(CONFIG_ISA_ARCV2)
|
||||
#if defined(__GNUC__)
|
||||
#include "float_regs_arc_gcc.h"
|
||||
|
@ -84,7 +90,7 @@ static volatile unsigned int load_store_low_count;
|
|||
static volatile unsigned int load_store_high_count;
|
||||
|
||||
/* Indicates that the load/store test exited */
|
||||
static bool test_exited;
|
||||
static volatile bool test_exited;
|
||||
|
||||
/* Semaphore for signaling end of test */
|
||||
static K_SEM_DEFINE(test_exit_sem, 0, 1);
|
||||
|
|
|
@ -52,7 +52,7 @@ static volatile unsigned int calc_pi_low_count;
|
|||
static volatile unsigned int calc_pi_high_count;
|
||||
|
||||
/* Indicates that the load/store test exited */
|
||||
static bool test_exited;
|
||||
static volatile bool test_exited;
|
||||
|
||||
/* Semaphore for signaling end of test */
|
||||
static K_SEM_DEFINE(test_exit_sem, 0, 1);
|
||||
|
|
|
@ -13,6 +13,13 @@ tests:
|
|||
tags: kernel
|
||||
timeout: 600
|
||||
min_ram: 16
|
||||
kernel.fpu_sharing.generic.arm64:
|
||||
extra_args: PI_NUM_ITERATIONS=70000
|
||||
arch_allow: arm64
|
||||
filter: CONFIG_CPU_CORTEX_A
|
||||
slow: true
|
||||
tags: kernel
|
||||
timeout: 600
|
||||
kernel.fpu_sharing.generic.riscv32:
|
||||
extra_args: PI_NUM_ITERATIONS=500
|
||||
filter: CONFIG_CPU_HAS_FPU
|
||||
|
|
|
@ -16,6 +16,15 @@
|
|||
#define TEST_NESTED_ISR
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARM64) && defined(CONFIG_FPU_SHARING)
|
||||
/*
|
||||
* The various log outputs trigger FP access due to the va_list used by
|
||||
* printk() and friends. IRQs are masked to prevent further IRQ nesting
|
||||
* when that happens.
|
||||
*/
|
||||
#undef TEST_NESTED_ISR
|
||||
#endif
|
||||
|
||||
#define DURATION 5
|
||||
|
||||
#define ISR0_TOKEN 0xDEADBEEF
|
||||
|
|
|
@ -131,7 +131,7 @@ static inline void set_fault_valid(bool valid)
|
|||
|
||||
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
|
||||
#define TEST_HEAP_SIZE (2 << CONFIG_MAX_THREAD_BYTES) * 1024
|
||||
#define MAX_OBJ 512
|
||||
#else
|
||||
|
|
Loading…
Reference in a new issue