riscv: Add support for floating point

This change adds full shared floating point support for the RISCV architecture with minimal impact on threads with floating point support not enabled. Signed-off-by: Corey Wharton <coreyw7@fb.com>
2020-03-11 18:15:29 -07:00 · 2020-03-11 18:15:29 -07:00 · 58232d58e0
parent 7dd4cc5821
commit 58232d58e0
14 changed files with 667 additions and 18 deletions
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -516,7 +516,7 @@ config MPU_GAP_FILLING
 menuconfig FLOAT
 	bool "Floating point"
 	depends on CPU_HAS_FPU
-	depends on ARM || X86 || ARC
+	depends on ARC || ARM || RISCV || X86
 	help
 	  This option allows threads to use the floating point registers.
 	  By default, only a single thread may use the registers.
@ -531,6 +531,13 @@ config FP_SHARING
 	  This option allows multiple threads to use the floating point
 	  registers.

+config FLOAT_64BIT
+	bool "Double precision floating point"
+	depends on FLOAT
+	depends on RISCV
+	help
+	  This option means that floating point registers are 64bit width.
+
 config ARCH
 	string
 	help
--- a/arch/riscv/core/isr.S
+++ b/arch/riscv/core/isr.S
@ -9,6 +9,62 @@
 #include <linker/sections.h>
 #include <offsets_short.h>
 #include <arch/cpu.h>
+#include <sys/util.h>
+#include <kernel.h>
+
+/* Convenience macros for loading/storing register states. */
+
+#define DO_FP_CALLER_SAVED(op, reg) \
+	op ft0, __z_arch_esf_t_ft0_OFFSET(reg)	 ;\
+	op ft1, __z_arch_esf_t_ft1_OFFSET(reg)	 ;\
+	op ft2, __z_arch_esf_t_ft2_OFFSET(reg)	 ;\
+	op ft3, __z_arch_esf_t_ft3_OFFSET(reg)	 ;\
+	op ft4, __z_arch_esf_t_ft4_OFFSET(reg)	 ;\
+	op ft5, __z_arch_esf_t_ft5_OFFSET(reg)	 ;\
+	op ft6, __z_arch_esf_t_ft6_OFFSET(reg)	 ;\
+	op ft7, __z_arch_esf_t_ft7_OFFSET(reg)	 ;\
+	op ft8, __z_arch_esf_t_ft8_OFFSET(reg)	 ;\
+	op ft9, __z_arch_esf_t_ft9_OFFSET(reg)	 ;\
+	op ft10, __z_arch_esf_t_ft10_OFFSET(reg) ;\
+	op ft11, __z_arch_esf_t_ft11_OFFSET(reg) ;\
+	op fa0, __z_arch_esf_t_fa0_OFFSET(reg)	 ;\
+	op fa1, __z_arch_esf_t_fa1_OFFSET(reg)	 ;\
+	op fa2, __z_arch_esf_t_fa2_OFFSET(reg)	 ;\
+	op fa3, __z_arch_esf_t_fa3_OFFSET(reg)	 ;\
+	op fa4, __z_arch_esf_t_fa4_OFFSET(reg)	 ;\
+	op fa5, __z_arch_esf_t_fa5_OFFSET(reg)	 ;\
+	op fa6, __z_arch_esf_t_fa6_OFFSET(reg)	 ;\
+	op fa7, __z_arch_esf_t_fa7_OFFSET(reg)	 ;
+
+#define STORE_FP_CALLER_SAVED(reg) \
+	DO_FP_CALLER_SAVED(RV_OP_STOREFPREG, reg)
+
+#define LOAD_FP_CALLER_SAVED(reg) \
+	DO_FP_CALLER_SAVED(RV_OP_LOADFPREG, reg)
+
+#define DO_FP_CALLEE_SAVED(op, reg) \
+	op fs0, _thread_offset_to_fs0(reg)   ;\
+	op fs1, _thread_offset_to_fs1(reg)   ;\
+	op fs2, _thread_offset_to_fs2(reg)   ;\
+	op fs3, _thread_offset_to_fs3(reg)   ;\
+	op fs4, _thread_offset_to_fs4(reg)   ;\
+	op fs5, _thread_offset_to_fs5(reg)   ;\
+	op fs6, _thread_offset_to_fs6(reg)   ;\
+	op fs7, _thread_offset_to_fs7(reg)   ;\
+	op fs8, _thread_offset_to_fs8(reg)   ;\
+	op fs9, _thread_offset_to_fs9(reg)   ;\
+	op fs10, _thread_offset_to_fs10(reg) ;\
+	op fs11, _thread_offset_to_fs11(reg) ;
+
+#define STORE_FP_CALLEE_SAVED(reg) \
+	frcsr t2				       ;\
+	RV_OP_STOREREG t2, _thread_offset_to_fcsr(reg) ;\
+	DO_FP_CALLEE_SAVED(RV_OP_STOREFPREG, reg)
+
+#define LOAD_FP_CALLEE_SAVED(reg) \
+	RV_OP_LOADREG t2, _thread_offset_to_fcsr(reg) ;\
+	fscsr x0, t2				      ;\
+	DO_FP_CALLEE_SAVED(RV_OP_LOADFPREG, reg)

 /* imports */
 GDATA(_sw_isr_table)
@ -68,12 +124,7 @@ SECTION_FUNC(exception.entry, __irq_wrapper)
 	/* Allocate space on thread stack to save registers */
 	addi sp, sp, -__z_arch_esf_t_SIZEOF

-	/*
-	 * Save caller-saved registers on current thread stack.
-	 * NOTE: need to be updated to account for floating-point registers
-	 * floating-point registers should be accounted for when corresponding
-	 * config variable is set
-	 */
+	/* Save caller-saved registers on current thread stack. */
 	RV_OP_STOREREG ra, __z_arch_esf_t_ra_OFFSET(sp)
 	RV_OP_STOREREG gp, __z_arch_esf_t_gp_OFFSET(sp)
 	RV_OP_STOREREG tp, __z_arch_esf_t_tp_OFFSET(sp)
@ -93,6 +144,19 @@ SECTION_FUNC(exception.entry, __irq_wrapper)
 	RV_OP_STOREREG a6, __z_arch_esf_t_a6_OFFSET(sp)
 	RV_OP_STOREREG a7, __z_arch_esf_t_a7_OFFSET(sp)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Assess whether floating-point registers need to be saved. */
+	la t0, _kernel
+	RV_OP_LOADREG t0, _kernel_offset_to_current(t0)
+	RV_OP_LOADREG t0, _thread_offset_to_user_options(t0)
+	andi t0, t0, K_FP_REGS
+	RV_OP_STOREREG t0, __z_arch_esf_t_fp_state_OFFSET(sp)
+	beqz t0, skip_store_fp_caller_saved
+	STORE_FP_CALLER_SAVED(sp)
+
+skip_store_fp_caller_saved:
+#endif
+
 	/* Save MEPC register */
 	csrr t0, mepc
 	RV_OP_STOREREG t0, __z_arch_esf_t_mepc_OFFSET(sp)
@ -332,6 +396,16 @@ reschedule:
 	RV_OP_STOREREG s10, _thread_offset_to_s10(t1)
 	RV_OP_STOREREG s11, _thread_offset_to_s11(t1)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Assess whether floating-point registers need to be saved. */
+	RV_OP_LOADREG t2, _thread_offset_to_user_options(t1)
+	andi t2, t2, K_FP_REGS
+	beqz t2, skip_store_fp_callee_saved
+	STORE_FP_CALLEE_SAVED(t1)
+
+skip_store_fp_callee_saved:
+#endif
+
 	/*
 	 * Save stack pointer of current thread and set the default return value
 	 * of z_swap to _k_neg_eagain for the thread.
@ -366,6 +440,26 @@ reschedule:
 	RV_OP_LOADREG s10, _thread_offset_to_s10(t1)
 	RV_OP_LOADREG s11, _thread_offset_to_s11(t1)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Determine if we need to restore floating-point registers. */
+	RV_OP_LOADREG t2, _thread_offset_to_user_options(t1)
+	andi t2, t2, K_FP_REGS
+	beqz t2, skip_load_fp_callee_saved
+
+	/*
+	 * If we are switching from a thread with floating-point disabled the
+	 * mstatus FS bits will still be cleared, which can cause an illegal
+	 * instruction fault. Set the FS state before restoring the registers.
+	 * mstatus will be restored later on.
+	 */
+	li t2, MSTATUS_FS_INIT
+	csrrs x0, mstatus, t2
+
+	LOAD_FP_CALLEE_SAVED(t1)
+
+skip_load_fp_callee_saved:
+#endif
+
 #ifdef CONFIG_EXECUTION_BENCHMARKING
 	addi sp, sp, -__z_arch_esf_t_SIZEOF

@ -388,6 +482,17 @@ reschedule:
 	RV_OP_STOREREG a6, __z_arch_esf_t_a6_OFFSET(sp)
 	RV_OP_STOREREG a7, __z_arch_esf_t_a7_OFFSET(sp)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Assess whether floating-point registers need to be saved. */
+	RV_OP_LOADREG t2, _thread_offset_to_user_options(sp)
+	andi t2, t2, K_FP_REGS
+	RV_OP_STOREREG t2, __z_arch_esf_t_fp_state_OFFSET(sp)
+	beqz t2, skip_store_fp_caller_saved_benchmark
+	STORE_FP_CALLER_SAVED(sp)
+
+skip_store_fp_caller_saved_benchmark:
+#endif
+
 	call read_timer_end_of_swap

 	RV_OP_LOADREG ra, __z_arch_esf_t_ra_OFFSET(sp)
@ -409,6 +514,15 @@ reschedule:
 	RV_OP_LOADREG a6, __z_arch_esf_t_a6_OFFSET(sp)
 	RV_OP_LOADREG a7, __z_arch_esf_t_a7_OFFSET(sp)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Determine if we need to restore floating-point registers. */
+	RV_OP_LOADREG t2, __z_arch_esf_t_fp_state_OFFSET(sp)
+	beqz t2, skip_load_fp_caller_saved_benchmark
+	LOAD_FP_CALLER_SAVED(sp)
+
+skip_load_fp_caller_saved_benchmark:
+#endif
+
 	/* Release stack space */
 	addi sp, sp, __z_arch_esf_t_SIZEOF
 #endif
@ -448,6 +562,15 @@ no_reschedule:
 	RV_OP_LOADREG a6, __z_arch_esf_t_a6_OFFSET(sp)
 	RV_OP_LOADREG a7, __z_arch_esf_t_a7_OFFSET(sp)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	/* Determine if we need to restore floating-point registers. */
+	RV_OP_LOADREG t0, __z_arch_esf_t_fp_state_OFFSET(sp)
+	beqz t0, skip_load_fp_caller_saved
+	LOAD_FP_CALLER_SAVED(sp)
+
+skip_load_fp_caller_saved:
+#endif
+
 	/* Release stack space */
 	addi sp, sp, __z_arch_esf_t_SIZEOF

--- a/arch/riscv/core/offsets/offsets.c
+++ b/arch/riscv/core/offsets/offsets.c
@ -43,6 +43,22 @@ GEN_OFFSET_SYM(_callee_saved_t, s9);
 GEN_OFFSET_SYM(_callee_saved_t, s10);
 GEN_OFFSET_SYM(_callee_saved_t, s11);

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+GEN_OFFSET_SYM(_callee_saved_t, fcsr);
+GEN_OFFSET_SYM(_callee_saved_t, fs0);
+GEN_OFFSET_SYM(_callee_saved_t, fs1);
+GEN_OFFSET_SYM(_callee_saved_t, fs2);
+GEN_OFFSET_SYM(_callee_saved_t, fs3);
+GEN_OFFSET_SYM(_callee_saved_t, fs4);
+GEN_OFFSET_SYM(_callee_saved_t, fs5);
+GEN_OFFSET_SYM(_callee_saved_t, fs6);
+GEN_OFFSET_SYM(_callee_saved_t, fs7);
+GEN_OFFSET_SYM(_callee_saved_t, fs8);
+GEN_OFFSET_SYM(_callee_saved_t, fs9);
+GEN_OFFSET_SYM(_callee_saved_t, fs10);
+GEN_OFFSET_SYM(_callee_saved_t, fs11);
+#endif
+
 /* esf member offsets */
 GEN_OFFSET_SYM(z_arch_esf_t, ra);
 GEN_OFFSET_SYM(z_arch_esf_t, gp);
@ -66,6 +82,30 @@ GEN_OFFSET_SYM(z_arch_esf_t, a7);
 GEN_OFFSET_SYM(z_arch_esf_t, mepc);
 GEN_OFFSET_SYM(z_arch_esf_t, mstatus);

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+GEN_OFFSET_SYM(z_arch_esf_t, fp_state);
+GEN_OFFSET_SYM(z_arch_esf_t, ft0);
+GEN_OFFSET_SYM(z_arch_esf_t, ft1);
+GEN_OFFSET_SYM(z_arch_esf_t, ft2);
+GEN_OFFSET_SYM(z_arch_esf_t, ft3);
+GEN_OFFSET_SYM(z_arch_esf_t, ft4);
+GEN_OFFSET_SYM(z_arch_esf_t, ft5);
+GEN_OFFSET_SYM(z_arch_esf_t, ft6);
+GEN_OFFSET_SYM(z_arch_esf_t, ft7);
+GEN_OFFSET_SYM(z_arch_esf_t, ft8);
+GEN_OFFSET_SYM(z_arch_esf_t, ft9);
+GEN_OFFSET_SYM(z_arch_esf_t, ft10);
+GEN_OFFSET_SYM(z_arch_esf_t, ft11);
+GEN_OFFSET_SYM(z_arch_esf_t, fa0);
+GEN_OFFSET_SYM(z_arch_esf_t, fa1);
+GEN_OFFSET_SYM(z_arch_esf_t, fa2);
+GEN_OFFSET_SYM(z_arch_esf_t, fa3);
+GEN_OFFSET_SYM(z_arch_esf_t, fa4);
+GEN_OFFSET_SYM(z_arch_esf_t, fa5);
+GEN_OFFSET_SYM(z_arch_esf_t, fa6);
+GEN_OFFSET_SYM(z_arch_esf_t, fa7);
+#endif
+
 #if defined(CONFIG_RISCV_SOC_CONTEXT_SAVE)
 GEN_OFFSET_SYM(z_arch_esf_t, soc_context);
 #endif
--- a/arch/riscv/core/reset.S
+++ b/arch/riscv/core/reset.S
@ -46,6 +46,20 @@ loop_slave_core:

 boot_master_core:

+#ifdef CONFIG_FLOAT
+	/*
+	 * Enable floating-point.
+	 */
+	li  t0, MSTATUS_FS_INIT
+	csrrs x0, mstatus, t0
+
+	/*
+	 * Floating-point rounding mode set to IEEE-754 default, and clear
+	 * all exception flags.
+	 */
+	fscsr x0, x0
+#endif
+
 #ifdef CONFIG_INIT_STACKS
 	/* Pre-populate all bytes in z_interrupt_stacks with 0xAA */
 	la t0, z_interrupt_stacks
--- a/arch/riscv/core/thread.c
+++ b/arch/riscv/core/thread.c
@ -8,9 +8,9 @@
 #include <ksched.h>

 void z_thread_entry_wrapper(k_thread_entry_t thread,
-			   void *arg1,
-			   void *arg2,
-			   void *arg3);
+			    void *arg1,
+			    void *arg2,
+			    void *arg3);

 void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
 		     size_t stack_size, k_thread_entry_t thread_func,
@ -25,8 +25,8 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,

 	/* Initial stack frame for thread */
 	stack_init = (struct __esf *)
-		Z_STACK_PTR_ALIGN(stack_memory +
-				 stack_size - sizeof(struct __esf));
+		     Z_STACK_PTR_ALIGN(stack_memory +
+				       stack_size - sizeof(struct __esf));

 	/* Setup the initial stack frame */
 	stack_init->a0 = (ulong_t)thread_func;
@ -57,7 +57,78 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
 	 *    thread stack.
 	 */
 	stack_init->mstatus = MSTATUS_DEF_RESTORE;
+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	if ((thread->base.user_options & K_FP_REGS) != 0) {
+		stack_init->mstatus |= MSTATUS_FS_INIT;
+	}
+	stack_init->fp_state = 0;
+#endif
 	stack_init->mepc = (ulong_t)z_thread_entry_wrapper;

 	thread->callee_saved.sp = (ulong_t)stack_init;
 }
+
+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+int arch_float_disable(struct k_thread *thread)
+{
+	unsigned int key;
+
+	if (thread != _current) {
+		return -EINVAL;
+	}
+
+	if (arch_is_in_isr()) {
+		return -EINVAL;
+	}
+
+	/* Ensure a preemptive context switch does not occur */
+	key = irq_lock();
+
+	/* Disable all floating point capabilities for the thread */
+	thread->base.user_options &= ~K_FP_REGS;
+
+	/* Clear the FS bits to disable the FPU. */
+	__asm__ volatile (
+		"mv t0, %0\n"
+		"csrrc x0, mstatus, t0\n"
+		:
+		: "r" (MSTATUS_FS_MASK)
+		);
+
+	irq_unlock(key);
+
+	return 0;
+}
+
+
+int arch_float_enable(struct k_thread *thread)
+{
+	unsigned int key;
+
+	if (thread != _current) {
+		return -EINVAL;
+	}
+
+	if (arch_is_in_isr()) {
+		return -EINVAL;
+	}
+
+	/* Ensure a preemptive context switch does not occur */
+	key = irq_lock();
+
+	/* Enable all floating point capabilities for the thread. */
+	thread->base.user_options |= K_FP_REGS;
+
+	/* Set the FS bits to Initial to enable the FPU. */
+	__asm__ volatile (
+		"mv t0, %0\n"
+		"csrrs x0, mstatus, t0\n"
+		:
+		: "r" (MSTATUS_FS_INIT)
+		);
+
+	irq_unlock(key);
+
+	return 0;
+}
+#endif /* CONFIG_FLOAT && CONFIG_FP_SHARING */
--- a/arch/riscv/include/offsets_short_arch.h
+++ b/arch/riscv/include/offsets_short_arch.h
@ -59,6 +59,49 @@
 #define _thread_offset_to_swap_return_value \
 	(___thread_t_arch_OFFSET + ___thread_arch_t_swap_return_value_OFFSET)

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+
+#define _thread_offset_to_fcsr \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fcsr_OFFSET)
+
+#define _thread_offset_to_fs0 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs0_OFFSET)
+
+#define _thread_offset_to_fs1 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs1_OFFSET)
+
+#define _thread_offset_to_fs2 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs2_OFFSET)
+
+#define _thread_offset_to_fs3 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs3_OFFSET)
+
+#define _thread_offset_to_fs4 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs4_OFFSET)
+
+#define _thread_offset_to_fs5 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs5_OFFSET)
+
+#define _thread_offset_to_fs6 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs6_OFFSET)
+
+#define _thread_offset_to_fs7 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs7_OFFSET)
+
+#define _thread_offset_to_fs8 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs8_OFFSET)
+
+#define _thread_offset_to_fs9 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs9_OFFSET)
+
+#define _thread_offset_to_fs10 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs10_OFFSET)
+
+#define _thread_offset_to_fs11 \
+	(___thread_t_callee_saved_OFFSET + ___callee_saved_t_fs11_OFFSET)
+
+#endif /* defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING) */
+
 /* end - threads */

 #endif /* ZEPHYR_ARCH_RISCV_INCLUDE_OFFSETS_SHORT_ARCH_H_ */
--- a/doc/reference/kernel/other/float.rst
+++ b/doc/reference/kernel/other/float.rst
@ -144,6 +144,38 @@ context of threads that are not using the FP registers. An extra 16 bytes
 (single floating point hardware) or 32 bytes (double floating point hardware)
 of stack space is required to load and store floating point registers.

+RISC-V architecture
+-------------------
+
+On the RISC-V architecture, the kernel treats each thread as a non-user
+or FPU user and the thread must be tagged by one of the
+following techniques:
+
+* A statically-created RISC-V thread can be tagged by passing the
+  :c:macro:`K_FP_REGS` option to :c:macro:`K_THREAD_DEFINE`.
+
+* A dynamically-created RISC-V thread can be tagged by passing the
+  :c:macro:`K_FP_REGS` to :cpp:func:`k_thread_create()`.
+
+* A running RISC-V thread can be tagged by calling :cpp:func:`k_float_enable()`.
+  This function can only be called from the thread itself.
+
+If a RISC-V thread no longer requires the use of the floating point registers,
+it can call :cpp:func:`k_float_disable()`. This instructs the kernel not to
+save or restore its FP context during thread context switching. This function
+can only be called from the thread itself.
+
+During thread context switching the RISC-V kernel saves the *callee-saved*
+floating point registers, if the switched-out thread is tagged with
+:c:macro:`K_FP_REGS`. Additionally, the *caller-saved* floating point
+registers are saved on the thread's stack. If the switched-in thread has been
+tagged with :c:macro:`K_FP_REGS`, then the kernel restores the *callee-saved*
+FP registers of the switched-in thread and the *caller-saved* FP context is
+restored from the thread's stack. Thus, the kernel does not save or restore the
+FP context of threads that are not using the FP registers. An extra 84 bytes
+(single floating point hardware) or 164 bytes (double floating point hardware)
+of stack space is required to load and store floating point registers.
+
 x86 architecture
 ----------------

--- a/include/arch/riscv/arch.h
+++ b/include/arch/riscv/arch.h
@ -40,13 +40,24 @@
 #define RV_REGSHIFT 2
 #endif

+#ifdef CONFIG_FLOAT_64BIT
+#define RV_OP_LOADFPREG fld
+#define RV_OP_STOREFPREG fsd
+#else
+#define RV_OP_LOADFPREG flw
+#define RV_OP_STOREFPREG fsw
+#endif
+
 /* Common mstatus bits. All supported cores today have the same
 * layouts.
 */

-#define MSTATUS_IEN	(1UL << 3)
-#define MSTATUS_MPP_M	(3UL << 11)
+#define MSTATUS_IEN     (1UL << 3)
+#define MSTATUS_MPP_M   (3UL << 11)
 #define MSTATUS_MPIE_EN (1UL << 7)
+#define MSTATUS_FS_INIT (1UL << 13)
+#define MSTATUS_FS_MASK ((1UL << 13) | (1UL << 14))
+

 /* This comes from openisa_rv32m1, but doesn't seem to hurt on other
 * platforms:
--- a/include/arch/riscv/exp.h
+++ b/include/arch/riscv/exp.h
@ -41,6 +41,14 @@ struct soc_esf {
 };
 #endif

+#if !defined(RV_FP_TYPE) && defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+#ifdef CONFIG_FLOAT_64BIT
+#define RV_FP_TYPE u64_t
+#else
+#define RV_FP_TYPE u32_t
+#endif
+#endif
+
 struct __esf {
 	ulong_t ra;		/* return address */
 	ulong_t gp;		/* global pointer */
@ -66,6 +74,30 @@ struct __esf {
 	ulong_t mepc;		/* machine exception program counter */
 	ulong_t mstatus;	/* machine status register */

+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	ulong_t fp_state;	/* Floating-point saved context state. */
+	RV_FP_TYPE ft0;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft1;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft2;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft3;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft4;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft5;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft6;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft7;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft8;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft9;		/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft10;	/* Caller-saved temporary floating register */
+	RV_FP_TYPE ft11;	/* Caller-saved temporary floating register */
+	RV_FP_TYPE fa0;		/* function argument/return value */
+	RV_FP_TYPE fa1;		/* function argument/return value */
+	RV_FP_TYPE fa2;		/* function argument */
+	RV_FP_TYPE fa3;		/* function argument */
+	RV_FP_TYPE fa4;		/* function argument */
+	RV_FP_TYPE fa5;		/* function argument */
+	RV_FP_TYPE fa6;		/* function argument */
+	RV_FP_TYPE fa7;		/* function argument */
+#endif
+
 #ifdef CONFIG_RISCV_SOC_CONTEXT_SAVE
 	struct soc_esf soc_context;
 #endif
--- a/include/arch/riscv/thread.h
+++ b/include/arch/riscv/thread.h
@ -22,6 +22,14 @@
 #ifndef _ASMLANGUAGE
 #include <zephyr/types.h>

+#if !defined(RV_FP_TYPE) && defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+#ifdef CONFIG_FLOAT_64BIT
+#define RV_FP_TYPE u64_t
+#else
+#define RV_FP_TYPE u32_t
+#endif
+#endif
+
 /*
 * The following structure defines the list of registers that need to be
 * saved/restored when a cooperative context switch occurs.
@ -41,6 +49,22 @@ struct _callee_saved {
 	ulong_t s9;	/* saved register */
 	ulong_t s10;	/* saved register */
 	ulong_t s11;	/* saved register */
+
+#if defined(CONFIG_FLOAT) && defined(CONFIG_FP_SHARING)
+	u32_t fcsr;		/* Control and status register */
+	RV_FP_TYPE fs0;		/* saved floating-point register */
+	RV_FP_TYPE fs1;		/* saved floating-point register */
+	RV_FP_TYPE fs2;		/* saved floating-point register */
+	RV_FP_TYPE fs3;		/* saved floating-point register */
+	RV_FP_TYPE fs4;		/* saved floating-point register */
+	RV_FP_TYPE fs5;		/* saved floating-point register */
+	RV_FP_TYPE fs6;		/* saved floating-point register */
+	RV_FP_TYPE fs7;		/* saved floating-point register */
+	RV_FP_TYPE fs8;		/* saved floating-point register */
+	RV_FP_TYPE fs9;		/* saved floating-point register */
+	RV_FP_TYPE fs10;	/* saved floating-point register */
+	RV_FP_TYPE fs11;	/* saved floating-point register */
+#endif
 };
 typedef struct _callee_saved _callee_saved_t;

--- a/tests/kernel/fp_sharing/float_disable/src/k_float_disable.c
+++ b/tests/kernel/fp_sharing/float_disable/src/k_float_disable.c
@ -14,7 +14,7 @@
 */
 #define PRIORITY  K_PRIO_COOP(0)

-#if defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) || defined(CONFIG_RISCV)
 #define K_FP_OPTS K_FP_REGS
 #elif defined(CONFIG_X86)
 #define K_FP_OPTS (K_FP_REGS | K_SSE_REGS)
@ -32,7 +32,7 @@ static void usr_fp_thread_entry_1(void)
 	k_yield();
 }

-#if defined(CONFIG_ARM) || \
+#if defined(CONFIG_ARM) || defined(CONFIG_RISCV) || \
 	(defined(CONFIG_X86) && defined(CONFIG_LAZY_FP_SHARING))
 #define K_FLOAT_DISABLE_SYSCALL_RETVAL 0
 #else
@ -78,7 +78,7 @@ void test_k_float_disable_common(void)
 		"usr_fp_thread FP options not set (0x%0x)",
 		usr_fp_thread.base.user_options);

-#if defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) || defined(RISCV)
 	/* Verify FP mode can only be disabled for current thread */
 	zassert_true((k_float_disable(&usr_fp_thread) == -EINVAL),
 		"k_float_disable() successful on thread other than current!");
@ -130,7 +130,7 @@ void test_k_float_disable_syscall(void)
 	/* Yield will swap-in usr_fp_thread */
 	k_yield();

-#if defined(CONFIG_ARM) || \
+#if defined(CONFIG_ARM) || defined(CONFIG_RISCV) || \
 	(defined(CONFIG_X86) && defined(CONFIG_LAZY_FP_SHARING))

 	/* Verify K_FP_OPTS are now cleared by the user thread itself */
--- a/tests/kernel/fp_sharing/generic/src/float_context.h
+++ b/tests/kernel/fp_sharing/generic/src/float_context.h
@ -106,6 +106,23 @@ struct fp_non_volatile_register_set {
 #define SIZEOF_FP_VOLATILE_REGISTER_SET sizeof(struct fp_volatile_register_set)
 #define SIZEOF_FP_NON_VOLATILE_REGISTER_SET 0

+#elif defined(CONFIG_RISCV)
+
+struct fp_volatile_register_set {
+#ifdef CONFIG_FLOAT_64BIT
+	u64_t fp[32];
+#else
+	u32_t fp[32];
+#endif
+};
+
+struct fp_non_volatile_register_set {
+	/* No non-volatile floating point registers */
+};
+
+#define SIZEOF_FP_VOLATILE_REGISTER_SET sizeof(struct fp_volatile_register_set)
+#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET 0
+
 #else

 #error  "Architecture must provide the following definitions:\n"
--- a/tests/kernel/fp_sharing/generic/src/float_regs_riscv_gcc.h
+++ b/tests/kernel/fp_sharing/generic/src/float_regs_riscv_gcc.h
@ -0,0 +1,229 @@
+/**
+ * @file
+ * @brief RISCV GCC specific floating point register macros
+ */
+
+/*
+ * Copyright (c) 2019, Huang Qi <757509347@qq.com>.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef _FLOAT_REGS_RISCV_GCC_H
+#define _FLOAT_REGS_RISCV_GCC_H
+
+#if !defined(__GNUC__) || !defined(CONFIG_RISCV)
+#error __FILE__ goes only with RISCV GCC
+#endif
+
+#include <toolchain.h>
+#include "float_context.h"
+
+#ifdef CONFIG_FLOAT_64BIT
+#define RV_FPREG_WIDTH 8
+#define RV_FPREG_SAVE "fsd "
+#define RV_FPREG_LOAD "fld "
+#else
+#define RV_FPREG_WIDTH 4
+#define RV_FPREG_SAVE "fsw "
+#define RV_FPREG_LOAD "flw "
+#endif
+
+/**
+ *
+ * @brief Load all floating point registers
+ *
+ * This function loads ALL floating point registers pointed to by @a regs.
+ * It is expected that a subsequent call to _store_all_float_registers()
+ * will be issued to dump the floating point registers to memory.
+ *
+ * The format/organization of 'struct fp_register_set'; the generic C test
+ * code (main.c) merely treat the register set as an array of bytes.
+ *
+ * The only requirement is that the arch specific implementations of
+ * _load_all_float_registers() and _store_all_float_registers() agree
+ * on the format.
+ *
+ * @return N/A
+ */
+static inline void _load_all_float_registers(struct fp_register_set *regs)
+{
+	__asm__(
+		"mv  t0, %0\n"
+		"mv  t1, %1\n"
+		RV_FPREG_LOAD "f0, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f1, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f2, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f3, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f4, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f5, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f6, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f7, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f8, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f9, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f10, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f11, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f12, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f13, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f14, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f15, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f16, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f17, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f18, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f19, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f20, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f21, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f22, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f23, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f24, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f25, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f26, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f27, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f28, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f29, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f30, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_LOAD "f31, 0(t0)\n"
+		:
+		: "r"(regs), "r"(RV_FPREG_WIDTH)
+		: "t0", "t1"
+	);
+}
+
+/**
+ *
+ * @brief Dump all floating point registers to memory
+ *
+ * This function stores ALL floating point registers to the memory buffer
+ * specified by @a regs. It is expected that a previous invocation of
+ * _load_all_float_registers() occurred to load all the floating point
+ * registers from a memory buffer.
+ *
+ * @return N/A
+ */
+
+static inline void _store_all_float_registers(struct fp_register_set *regs)
+{
+	__asm__ volatile(
+		"mv t0, %0\n\t"
+		"mv t1, %1\n\t"
+		RV_FPREG_SAVE "f0, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f1, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f2, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f3, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f4, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f5, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f6, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f7, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f8, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f9, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f10, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f11, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f12, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f13, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f14, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f15, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f16, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f17, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f18, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f19, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f20, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f21, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f22, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f23, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f24, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f25, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f26, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f27, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f28, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f29, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f30, 0(t0)\n"
+		"add t0, t0, t1\n"
+		RV_FPREG_SAVE "f31, 0(t0)\n"
+		:
+		: "r"(regs), "r"(RV_FPREG_WIDTH)
+		: "t0", "t1", "memory"
+	);
+}
+
+/**
+ *
+ * @brief Load then dump all float registers to memory
+ *
+ * This function loads ALL floating point registers from the memory buffer
+ * specified by @a regs, and then stores them back to that buffer.
+ *
+ * This routine is called by a high priority thread prior to calling a primitive
+ * that pends and triggers a co-operative context switch to a low priority
+ * thread.
+ *
+ * @return N/A
+ */
+
+static inline void _load_then_store_all_float_registers(struct fp_register_set
+							*regs)
+{
+	_load_all_float_registers(regs);
+	_store_all_float_registers(regs);
+}
+#endif /* _FLOAT_REGS_ARC_GCC_H */
--- a/tests/kernel/fp_sharing/generic/src/load_store.c
+++ b/tests/kernel/fp_sharing/generic/src/load_store.c
@ -56,6 +56,12 @@
 #else
 #include "float_regs_arc_other.h"
 #endif /* __GNUC__ */
+#elif defined(CONFIG_RISCV)
+#if defined(__GNUC__)
+#include "float_regs_riscv_gcc.h"
+#else
+#include "float_regs_riscv_other.h"
+#endif /* __GNUC__ */
 #endif

 #include "float_context.h"