riscv: better abstraction for register-wide FP load/store opcodes

Same rationale as preceding commit. Let's create pseudo-instructions in
assembly scope to make the code more uniform and readable.

Furthermore the definition of COPY_ESF_FP() was wrong as the width of
floating point registers vary not according to CONFIG_64BIT but
CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION. It is therefore wrong to use
lr/sr (previously RV_OP_LOADREG/RV_OP_STOREREG) and a regular temporary
register to transfer such content.

Note: There are far more efficient ways to copy FP context around but
      such optimisations will come separately.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2022-02-22 14:29:34 -05:00 committed by Anas Nashif
parent 1fd79b3ef4
commit bfb7919ed0
3 changed files with 70 additions and 56 deletions

View file

@ -29,3 +29,25 @@
.endm
#endif
#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
.macro flr, rd, mem
fld \rd, \mem
.endm
.macro fsr, rs, mem
fsd \rs, \mem
.endm
#else
.macro flr, rd, mem
flw \rd, \mem
.endm
.macro fsr, rs, mem
fsw \rs, \mem
.endm
#endif

View file

@ -41,10 +41,10 @@
op fa7, __z_arch_esf_t_fa7_OFFSET(reg) ;
#define STORE_FP_CALLER_SAVED(reg) \
DO_FP_CALLER_SAVED(RV_OP_STOREFPREG, reg)
DO_FP_CALLER_SAVED(fsr, reg)
#define LOAD_FP_CALLER_SAVED(reg) \
DO_FP_CALLER_SAVED(RV_OP_LOADFPREG, reg)
DO_FP_CALLER_SAVED(flr, reg)
#define DO_FP_CALLEE_SAVED(op, reg) \
op fs0, _thread_offset_to_fs0(reg) ;\
@ -63,58 +63,58 @@
#define STORE_FP_CALLEE_SAVED(reg) \
frcsr t2 ;\
sw t2, _thread_offset_to_fcsr(reg) ;\
DO_FP_CALLEE_SAVED(RV_OP_STOREFPREG, reg)
DO_FP_CALLEE_SAVED(fsr, reg)
#define LOAD_FP_CALLEE_SAVED(reg) \
lw t2, _thread_offset_to_fcsr(reg) ;\
fscsr t2 ;\
DO_FP_CALLEE_SAVED(RV_OP_LOADFPREG, reg)
DO_FP_CALLEE_SAVED(flr, reg)
#define COPY_ESF_FP_STATE(to_reg, from_reg, temp) \
lb temp, __z_arch_esf_t_fp_state_OFFSET(from_reg) ;\
sb temp, __z_arch_esf_t_fp_state_OFFSET(to_reg) ;
#define COPY_ESF_FP(to_reg, from_reg, temp) \
lr temp, __z_arch_esf_t_ft0_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft0_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft1_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft1_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft2_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft2_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft3_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft3_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft4_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft4_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft5_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft5_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft6_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft6_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft7_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft7_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft8_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft8_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft9_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft9_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft10_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft10_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_ft11_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_ft11_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa0_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa0_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa1_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa1_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa2_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa2_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa3_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa3_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa4_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa4_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa5_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa5_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa6_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa6_OFFSET(to_reg) ;\
lr temp, __z_arch_esf_t_fa7_OFFSET(from_reg) ;\
sr temp, __z_arch_esf_t_fa7_OFFSET(to_reg) ;
flr temp, __z_arch_esf_t_ft0_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft0_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft1_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft1_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft2_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft2_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft3_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft3_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft4_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft4_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft5_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft5_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft6_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft6_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft7_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft7_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft8_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft8_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft9_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft9_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft10_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft10_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_ft11_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_ft11_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa0_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa0_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa1_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa1_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa2_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa2_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa3_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa3_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa4_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa4_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa5_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa5_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa6_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa6_OFFSET(to_reg) ;\
flr temp, __z_arch_esf_t_fa7_OFFSET(from_reg) ;\
fsr temp, __z_arch_esf_t_fa7_OFFSET(to_reg)
#define COPY_ESF(to_reg, from_reg, temp) \
lr temp, __z_arch_esf_t_mepc_OFFSET(from_reg) ;\
@ -492,7 +492,7 @@ not_user_syscall:
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
lb t1, __z_arch_esf_t_fp_state_OFFSET(t0)
beqz t1, skip_fp_move_kernel_syscall
COPY_ESF_FP(sp, t0, t1)
COPY_ESF_FP(sp, t0, ft0)
skip_fp_move_kernel_syscall:
COPY_ESF_FP_STATE(sp, t0, t1)
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
@ -565,7 +565,7 @@ is_user_syscall:
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
lb t1, __z_arch_esf_t_fp_state_OFFSET(t0)
beqz t1, skip_fp_copy_user_syscall
COPY_ESF_FP(sp, t0, t1)
COPY_ESF_FP(sp, t0, ft0)
skip_fp_copy_user_syscall:
COPY_ESF_FP_STATE(sp, t0, t1)
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
@ -639,7 +639,7 @@ no_reschedule_user_fault:
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
lb t1, __z_arch_esf_t_fp_state_OFFSET(t0)
beqz t1, skip_fp_copy_return_user_syscall
COPY_ESF_FP(sp, t0, t1)
COPY_ESF_FP(sp, t0, ft0)
skip_fp_copy_return_user_syscall:
COPY_ESF_FP_STATE(sp, t0, t1)
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
@ -788,7 +788,7 @@ on_thread_stack:
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
lb t1, __z_arch_esf_t_fp_state_OFFSET(t0)
beqz t1, skip_fp_move_irq
COPY_ESF_FP(sp, t0, t1)
COPY_ESF_FP(sp, t0, ft0)
skip_fp_move_irq:
COPY_ESF_FP_STATE(sp, t0, t1)
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */

View file

@ -195,14 +195,6 @@
#define RV_REGSHIFT 2
#endif
#ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
#define RV_OP_LOADFPREG fld
#define RV_OP_STOREFPREG fsd
#else
#define RV_OP_LOADFPREG flw
#define RV_OP_STOREFPREG fsw
#endif
/* Common mstatus bits. All supported cores today have the same
* layouts.
*/