From b38445eaa03bf8367674b7fd0a4e9445513868e6 Mon Sep 17 00:00:00 2001 From: Yuval Peress Date: Thu, 6 Oct 2022 23:54:46 -0600 Subject: [PATCH] math: Introduce a DSP basicmath subsystem with a cmsis backend Introduce an API mirroring the CMSIS-DSP's basicmath. If CMSIS_DSP is enabled, then it will by default be used as a backend. Developers may opt into a custom backend by setting CONFIG_DSP_BACKEND_CMSIS=n. If done, the application must provide `zdsp_backend/dsp.h` and optionally implement the functions in its own .c files. Signed-off-by: Yuval Peress --- CODEOWNERS | 1 + MAINTAINERS.yml | 12 +- doc/develop/api/overview.rst | 4 + doc/services/dsp/index.rst | 61 ++ doc/services/index.rst | 1 + doc/zephyr.doxyfile.in | 1 + include/zephyr/dsp/basicmath.h | 920 +++++++++++++++++++++ include/zephyr/dsp/basicmath_f16.h | 124 +++ include/zephyr/dsp/dsp.h | 31 + include/zephyr/dsp/types.h | 71 ++ modules/Kconfig.cmsis | 2 +- modules/Kconfig.cmsis_dsp | 14 + subsys/CMakeLists.txt | 1 + subsys/Kconfig | 2 + subsys/dsp/CMakeLists.txt | 8 + subsys/dsp/Kconfig | 26 + subsys/dsp/cmsis/CMakeLists.txt | 4 + subsys/dsp/cmsis/public/zdsp_backend.h | 280 +++++++ subsys/dsp/cmsis/public/zdsp_backend_f16.h | 75 ++ west.yml | 2 +- 20 files changed, 1637 insertions(+), 3 deletions(-) create mode 100644 doc/services/dsp/index.rst create mode 100644 include/zephyr/dsp/basicmath.h create mode 100644 include/zephyr/dsp/basicmath_f16.h create mode 100644 include/zephyr/dsp/dsp.h create mode 100644 include/zephyr/dsp/types.h create mode 100644 subsys/dsp/CMakeLists.txt create mode 100644 subsys/dsp/Kconfig create mode 100644 subsys/dsp/cmsis/CMakeLists.txt create mode 100644 subsys/dsp/cmsis/public/zdsp_backend.h create mode 100644 subsys/dsp/cmsis/public/zdsp_backend_f16.h diff --git a/CODEOWNERS b/CODEOWNERS index c5383bcc13..018bc0d4cd 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -756,6 +756,7 @@ scripts/build/gen_image_info.py @tejlmand /subsys/debug/gdbstub.c @ceolin /subsys/dfu/ @de-nordic @nordicjm /subsys/disk/ @jfischer-no +/subsys/dsp/ @yperess /subsys/tracing/ @nashif /subsys/debug/asan_hacks.c @aescolar @daor-oti /subsys/demand_paging/ @dcpleung @nashif diff --git a/MAINTAINERS.yml b/MAINTAINERS.yml index a70cf9ea50..f66212f550 100644 --- a/MAINTAINERS.yml +++ b/MAINTAINERS.yml @@ -324,6 +324,17 @@ CMSIS API layer: - "area: CMSIS API Layer" - "area: Portability" +DSP subsystem: + status: maintained + maintainers: + - stephanosio + - yperess + files: + - subsys/dsp/ + - tests/subsys/dsp/ + labels: + - "area: DSP" + CMSIS-DSP integration: status: maintained maintainers: @@ -334,7 +345,6 @@ CMSIS-DSP integration: - modules/Kconfig.cmsis_dsp - tests/benchmarks/cmsis_dsp/ - tests/lib/cmsis_dsp/ - - tests/subsys/dsp/ labels: - "area: CMSIS-DSP" diff --git a/doc/develop/api/overview.rst b/doc/develop/api/overview.rst index a7836c388a..b6f686ee6c 100644 --- a/doc/develop/api/overview.rst +++ b/doc/develop/api/overview.rst @@ -316,3 +316,7 @@ between major releases are available in the :ref:`zephyr_release_notes`. * - :ref:`watchdog_api` - Stable - 1.0 + + * - :ref:`zdsp_api` + - Experimental + - 3.3 diff --git a/doc/services/dsp/index.rst b/doc/services/dsp/index.rst new file mode 100644 index 0000000000..6ce6acf384 --- /dev/null +++ b/doc/services/dsp/index.rst @@ -0,0 +1,61 @@ +.. _zdsp_api: + +Digital Signal Processing (DSP) +############################### + +.. contents:: + :local: + :depth: 2 + +The DSP API provides an architecture agnostic way for signal processing. +Currently, the API will work on any architecture but will likely not be +optimized. The status of the various architectures can be found below: + ++--------------+-------------+ +| Architecture | Status | ++--------------+-------------+ +| ARC | Unoptimized | +| ARM | Optimized | +| ARM64 | Optimized | +| MIPS | Unoptimized | +| NIOS2 | Unoptimized | +| POSIX | Unoptimized | +| RISCV | Unoptimized | +| RISCV64 | Unoptimized | +| SPARC | Unoptimized | +| X86 | Unoptimized | +| XTENSA | Unoptimized | ++--------------+-------------+ + +Using zDSP +********** + +zDSP provides various backend options which are selected automatically for the +application. By default, including the CMSIS module will enable all +architectures to use the zDSP APIs. This can be done by setting:: + + CONFIG_CMSIS_DSP=y + +If your application requires some additional customization, it's possible to +enable :kconfig:option:`CONFIG_DSP_BACKEND_CUSTOM` which means that the +application is responsible for providing the implementation of the zDSP +library. + +Optimizing for your architecture +******************************** + +If your architecture is showing as ``Unoptimized``, it's possible to add a new +zDSP backend to better support it. To do that, a new Kconfig option should be +added to `subsys/dsp/Kconfig`_ along with the required dependencies and the +``default`` set for ``DSP_BACKEND`` Kconfig choice. + +Next, the implementation should be added at ``subsys/dsp//`` and +linked in at `subsys/dsp/CMakeLists.txt`_. + +API Reference +************* + +.. doxygengroup:: math_dsp + +.. _subsys/dsp/Kconfig: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/Kconfig +.. _subsys/dsp/CMakeLists.txt: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/CMakeLists.txt diff --git a/doc/services/index.rst b/doc/services/index.rst index 61c1695ac4..675edf935d 100644 --- a/doc/services/index.rst +++ b/doc/services/index.rst @@ -10,6 +10,7 @@ OS Services crypto/index debugging/index.rst device_mgmt/index + dsp/index.rst file_system/index.rst formatted_output.rst ipc/index.rst diff --git a/doc/zephyr.doxyfile.in b/doc/zephyr.doxyfile.in index 79e3c2bf91..600d8ac81b 100644 --- a/doc/zephyr.doxyfile.in +++ b/doc/zephyr.doxyfile.in @@ -2316,6 +2316,7 @@ PREDEFINED = __DOXYGEN__ \ CONFIG_ERRNO \ CONFIG_FLASH_JESD216_API \ CONFIG_FLASH_PAGE_LAYOUT \ + CONFIG_FP16 \ CONFIG_FPU \ CONFIG_FPU_SHARING \ CONFIG_GDBSTUB \ diff --git a/include/zephyr/dsp/basicmath.h b/include/zephyr/dsp/basicmath.h new file mode 100644 index 0000000000..0128b3c727 --- /dev/null +++ b/include/zephyr/dsp/basicmath.h @@ -0,0 +1,920 @@ +/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file zephyr/dsp/basicmath.h + * + * @brief Public APIs for DSP basicmath + */ + +#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_H_ +#define INCLUDE_ZEPHYR_DSP_BASICMATH_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @ingroup math_dsp + * @defgroup math_dsp_basic Basic Math Functions + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_mult Vector Multiplication + * + * Element-by-element multiplication of two vectors. + *
+ *     dst[n] = src_a[n] * src_b[n],   0 <= n < block_size.
+ * 
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types. + * @{ + */ + +/** + * @brief Q7 vector multiplication. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q7 range [0x80 0x7F] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, + uint32_t block_size); + +/** + * @brief Q15 vector multiplication. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size); + +/** + * @brief Q31 vector multiplication. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size); + +/** + * @brief Floating-point vector multiplication. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_add Vector Addition + * + * Element-by-element addition of two vectors. + *
+ *     dst[n] = src_a[n] + src_b[n],   0 <= n < block_size.
+ * 
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types. + * @{ + */ + +/** + * @brief Floating-point vector addition. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size); + +/** + * @brief Q7 vector addition. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q7 range [0x80 0x7F] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, + uint32_t block_size); + +/** + * @brief Q15 vector addition. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size); + +/** + * @brief Q31 vector addition. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_sub Vector Subtraction + * + * Element-by-element subtraction of two vectors. + *
+ *     dst[n] = src_a[n] - src_b[n],   0 <= n < block_size.
+ * 
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types. + * @{ + */ + +/** + * @brief Floating-point vector subtraction. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size); + +/** + * @brief Q7 vector subtraction. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, + uint32_t block_size); + +/** + * @brief Q15 vector subtraction. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size); + +/** + * @brief Q31 vector subtraction. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_scale Vector Scale + * + * Multiply a vector by a scalar value. For floating-point data, the algorithm used is: + *
+ *     dst[n] = src[n] * scale,   0 <= n < block_size.
+ * 
+ * + * In the fixed-point Q7, Q15, and Q31 functions, scale is represented by a fractional + * multiplication scale_fract and an arithmetic shift shift. The shift + * allows the gain of the scaling operation to exceed 1.0. The algorithm used with fixed-point data + * is: + *
+ *     dst[n] = (src[n] * scale_fract) << shift,   0 <= n < block_size.
+ * 
+ * + * The overall scale factor applied to the fixed-point data is + *
+ *     scale = scale_fract * 2^shift.
+ * 
+ * The functions support in-place computation allowing the source and destination pointers to + * reference the same memory buffer. + * @{ + */ + +/** + * @brief Multiplies a floating-point vector by a scalar. + * @param[in] src points to the input vector + * @param[in] scale scale factor to be applied + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, + uint32_t block_size); + +/** + * @brief Multiplies a Q7 vector by a scalar. + * + * @par Scaling and Overflow Behavior + * The input data *src and scale_fract are in 1.7 format. + * These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to + * 1.7 format. + * + * @param[in] src points to the input vector + * @param[in] scale_fract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst, + uint32_t block_size); + +/** + * @brief Multiplies a Q15 vector by a scalar. + * + * @par Scaling and Overflow Behavior + * The input data *src and scale_fract are in 1.15 format. + * These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to + * 1.15 format. + * + * @param[in] src points to the input vector + * @param[in] scale_fract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst, + uint32_t block_size); + +/** + * @brief Multiplies a Q31 vector by a scalar. + * + * @par Scaling and Overflow Behavior + * The input data *src and scale_fract are in 1.31 format. + * These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to + * 1.31 format. + * + * @param[in] src points to the input vector + * @param[in] scale_fract fractional portion of the scale value + * @param[in] shift number of bits to shift the result by + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_abs Vector Absolute Value + * + * Computes the absolute value of a vector on an element-by-element basis. + *
+ *     dst[n] = abs(src[n]),   0 <= n < block_size.
+ * 
+ * The functions support in-place computation allowing the source and destination pointers to + * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and + * Q31 data types. + * @{ + */ + +/** + * @brief Floating-point vector absolute value. + * @param[in] src points to the input buffer + * @param[out] dst points to the output buffer + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size); + +/** + * @brief Q7 vector absolute value. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F. + * + * @param[in] src points to the input buffer + * @param[out] dst points to the output buffer + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size); + +/** + * @brief Q15 vector absolute value. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF. + * + * @param[in] src points to the input buffer + * @param[out] dst points to the output buffer + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size); + +/** + * @brief Q31 vector absolute value. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value + * 0x7FFFFFFF. + * + * @param[in] src points to the input buffer + * @param[out] dst points to the output buffer + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_dot Vector Dot Product + * + * Computes the dot product of two vectors. The vectors are multiplied element-by-element and then + * summed. + *
+ *     sum = src_a[0]*src_b[0] + src_a[1]*src_b[1] + ... + src_a[block_size-1]*src_b[block_size-1]
+ * 
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types. + * @{ + */ + +/** + * @brief Dot product of floating-point vectors. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[in] block_size number of samples in each vector + * @param[out] result output result returned here + */ +DSP_FUNC_SCOPE void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b, + uint32_t block_size, float32_t *result); + +/** + * @brief Dot product of Q7 vectors. + * + * @par Scaling and Overflow Behavior + * The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these results are added to + * an accumulator in 18.14 format. Nonsaturating additions are used and there is no danger of wrap + * around as long as the vectors are less than 2^18 elements long. The return result is in 18.14 + * format. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[in] block_size number of samples in each vector + * @param[out] result output result returned here + */ +DSP_FUNC_SCOPE void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size, + q31_t *result); + +/** + * @brief Dot product of Q15 vectors. + * + * @par Scaling and Overflow Behavior + * The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these results are added + * to a 64-bit accumulator in 34.30 format. Nonsaturating additions are used and given that there + * are 33 guard bits in the accumulator there is no risk of overflow. The return result is in + * 34.30 format. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[in] block_size number of samples in each vector + * @param[out] result output result returned here + */ +DSP_FUNC_SCOPE void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size, + q63_t *result); + +/** + * @brief Dot product of Q31 vectors. + * + * @par Scaling and Overflow Behavior + * The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these are truncated to + * 2.48 format by discarding the lower 14 bits. The 2.48 result is then added without saturation + * to a 64-bit accumulator in 16.48 format. There are 15 guard bits in the accumulator and there + * is no risk of overflow as long as the length of the vectors is less than 2^16 elements. The + * return result is in 16.48 format. + * + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[in] block_size number of samples in each vector + * @param[out] result output result returned here + */ +DSP_FUNC_SCOPE void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size, + q63_t *result); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_shift Vector Shift + * + * Shifts the elements of a fixed-point vector by a specified number of bits. + * There are separate functions for Q7, Q15, and Q31 data types. The underlying algorithm used is: + *
+ *     dst[n] = src[n] << shift,   0 <= n < block_size.
+ * 
+ * If shift is positive then the elements of the vector are shifted to the left. + * If shift is negative then the elements of the vector are shifted to the right. + * + * The functions support in-place computation allowing the source and destination pointers to + * reference the same memory buffer. + * @{ + */ + +/** + * @brief Shifts the elements of a Q7 vector a specified number of bits. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q7 range [0x80 0x7F] are saturated. + * + * @param[in] src points to the input vector + * @param[in] shift_bits number of bits to shift. A positive value shifts left; a negative value + * shifts right. + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst, + uint32_t block_size); + +/** + * @brief Shifts the elements of a Q15 vector a specified number of bits. + * + * @pre Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. + * + * @param[in] src points to the input vector + * @param[in] shift_bits number of bits to shift. A positive value shifts left; a negative value + * shifts right. + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst, + uint32_t block_size); + +/** + * @brief Shifts the elements of a Q31 vector a specified number of bits. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. + * + * @param[in] src points to the input vector + * @param[in] shift_bits number of bits to shift. A positive value shifts left; a negative value + * shifts right. + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_offset Vector Offset + * + * Adds a constant offset to each element of a vector. + *
+ *     dst[n] = src[n] + offset,   0 <= n < block_size.
+ * 
+ * The functions support in-place computation allowing the source and destination pointers to + * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and + * Q31 data types. + * + * @{ + */ + +/** + * @brief Adds a constant offset to a floating-point vector. + * @param[in] src points to the input vector + * @param[in] offset is the offset to be added + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst, + uint32_t block_size); + +/** + * @brief Adds a constant offset to a Q7 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q7 range [0x80 0x7F] are saturated. + * + * @param[in] src points to the input vector + * @param[in] offset is the offset to be added + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size); + +/** + * @brief Adds a constant offset to a Q15 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. + * + * @param[in] src points to the input vector + * @param[in] offset is the offset to be added + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst, + uint32_t block_size); + +/** + * @brief Adds a constant offset to a Q31 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. + * + * @param[in] src points to the input vector + * @param[in] offset is the offset to be added + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_negate Vector Negate + * + * Negates the elements of a vector. + *
+ *     dst[n] = -src[n],   0 <= n < block_size.
+ * 
+ * The functions support in-place computation allowing the source and destination pointers to + * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and + * Q31 data types. + * + * @{ + */ + +/** + * @brief Negates the elements of a floating-point vector. + * @param[in] src points to the input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size); + +/** + * @brief Negates the elements of a Q7 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F. + * + * @param[in] src points to the input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size); + +/** + * @brief Negates the elements of a Q15 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF. + * + * @param[in] src points to the input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size); + +/** + * @brief Negates the elements of a Q31 vector. + * + * @par Scaling and Overflow Behavior + * The function uses saturating arithmetic. + * The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF. + * + * @param[in] src points to the input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_and Vector bitwise AND + * + * Compute the logical bitwise AND. + * + * There are separate functions for uint32_t, uint16_t, and uint7_t data types. + * @{ + */ + +/** + * @brief Compute the logical bitwise AND of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise AND of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise AND of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_or Vector bitwise OR + * + * Compute the logical bitwise OR. + * + * There are separate functions for uint32_t, uint16_t, and uint7_t data types. + * @{ + */ + +/** + * @brief Compute the logical bitwise OR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise OR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise OR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_not Vector bitwise NOT + * + * Compute the logical bitwise NOT. + * + * There are separate functions for uint32_t, uint16_t, and uint7_t data types. + * @{ + */ + +/** + * @brief Compute the logical bitwise NOT of a fixed-point vector. + * @param[in] src points to input vector + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size); + +/** + * @brief Compute the logical bitwise NOT of a fixed-point vector. + * @param[in] src points to input vector + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size); + +/** + * @brief Compute the logical bitwise NOT of a fixed-point vector. + * @param[in] src points to input vector + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_xor Vector bitwise XOR + * + * Compute the logical bitwise XOR. + * + * There are separate functions for uint32_t, uint16_t, and uint7_t data types. + * @{ + */ + +/** + * @brief Compute the logical bitwise XOR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise XOR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size); + +/** + * @brief Compute the logical bitwise XOR of two fixed-point vectors. + * @param[in] src_a points to input vector A + * @param[in] src_b points to input vector B + * @param[out] dst points to output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size); + +/** + * @} + */ + +/** + * @ingroup math_dsp_basic + * @addtogroup math_dsp_basic_clip Vector Clipping + * + * Element-by-element clipping of a value. + * + * The value is constrained between 2 bounds. + * + * There are separate functions for floating-point, Q7, Q15, and Q31 data types. + * @{ + */ + +/** + * @brief Elementwise floating-point clipping + * @param[in] src points to input values + * @param[out] dst points to output clipped values + * @param[in] low lower bound + * @param[in] high higher bound + * @param[in] num_samples number of samples to clip + */ +DSP_FUNC_SCOPE void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low, + float32_t high, uint32_t num_samples); + +/** + * @brief Elementwise fixed-point clipping + * @param[in] src points to input values + * @param[out] dst points to output clipped values + * @param[in] low lower bound + * @param[in] high higher bound + * @param[in] num_samples number of samples to clip + */ +DSP_FUNC_SCOPE void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high, + uint32_t num_samples); + +/** + * @brief Elementwise fixed-point clipping + * @param[in] src points to input values + * @param[out] dst points to output clipped values + * @param[in] low lower bound + * @param[in] high higher bound + * @param[in] num_samples number of samples to clip + */ +DSP_FUNC_SCOPE void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high, + uint32_t num_samples); + +/** + * @brief Elementwise fixed-point clipping + * @param[in] src points to input values + * @param[out] dst points to output clipped values + * @param[in] low lower bound + * @param[in] high higher bound + * @param[in] num_samples number of samples to clip + */ +DSP_FUNC_SCOPE void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high, + uint32_t num_samples); + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#ifdef CONFIG_FP16 +#include +#endif /* CONFIG_FP16 */ + +#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_H_ */ diff --git a/include/zephyr/dsp/basicmath_f16.h b/include/zephyr/dsp/basicmath_f16.h new file mode 100644 index 0000000000..23239e9a65 --- /dev/null +++ b/include/zephyr/dsp/basicmath_f16.h @@ -0,0 +1,124 @@ +/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file zephyr/dsp/basicmath_f16.h + * + * @brief Public APIs for DSP basicmath for 16 bit floating point + */ + +#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_ +#define INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_ + +#ifndef CONFIG_FP16 +#error "Cannot use float16 DSP functionality without CONFIG_FP16 enabled" +#endif /* CONFIG_FP16 */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @ingroup math_dsp_basic_mult + * @brief Floating-point vector multiplication. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size); + +/** + * @ingroup math_dsp_basic_add + * @brief Floating-point vector addition. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size); + +/** + * @ingroup math_dsp_basic_sub + * @brief Floating-point vector subtraction. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size); + +/** + * @ingroup math_dsp_basic_scale + * @brief Multiplies a floating-point vector by a scalar. + * @param[in] src points to the input vector + * @param[in] scale scale factor to be applied + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst, + uint32_t block_size); + +/** + * @ingroup math_dsp_basic_abs + * @brief Floating-point vector absolute value. + * @param[in] src points to the input buffer + * @param[out] dst points to the output buffer + * @param[in] block_size number of samples in each vector + */ +DSP_FUNC_SCOPE void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size); + +/** + * @ingroup math_dsp_basic_dot + * @brief Dot product of floating-point vectors. + * @param[in] src_a points to the first input vector + * @param[in] src_b points to the second input vector + * @param[in] block_size number of samples in each vector + * @param[out] result output result returned here + */ +DSP_FUNC_SCOPE void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b, + uint32_t block_size, float16_t *result); + +/** + * @ingroup math_dsp_basic_offset + * @brief Adds a constant offset to a floating-point vector. + * @param[in] src points to the input vector + * @param[in] offset is the offset to be added + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst, + uint32_t block_size); + +/** + * @ingroup math_dsp_basic_negate + * @brief Negates the elements of a floating-point vector. + * @param[in] src points to the input vector + * @param[out] dst points to the output vector + * @param[in] block_size number of samples in the vector + */ +DSP_FUNC_SCOPE void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size); + +/** + * @ingroup math_dsp_basic_clip + * @brief Elementwise floating-point clipping + * @param[in] src points to input values + * @param[out] dst points to output clipped values + * @param[in] low lower bound + * @param[in] high higher bound + * @param[in] num_samples number of samples to clip + */ +DSP_FUNC_SCOPE void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low, + float16_t high, uint32_t num_samples); + +#ifdef __cplusplus +} +#endif + +#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_ */ diff --git a/include/zephyr/dsp/dsp.h b/include/zephyr/dsp/dsp.h new file mode 100644 index 0000000000..09c757216b --- /dev/null +++ b/include/zephyr/dsp/dsp.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2022 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file zephyr/dsp/dsp.h + * + * @brief Public APIs for Digital Signal Processing (DSP) math. + */ + +#ifndef INCLUDE_ZEPHYR_DSP_DSP_H_ +#define INCLUDE_ZEPHYR_DSP_DSP_H_ + +#ifdef CONFIG_DSP_BACKEND_HAS_STATIC +#define DSP_FUNC_SCOPE static +#else +#define DSP_FUNC_SCOPE +#endif + +/** + * @brief DSP Interface + * @defgroup math_dsp DSP Interface + */ + +#include + +#include + +#include "zdsp_backend.h" + +#endif /* INCLUDE_ZEPHYR_DSP_DSP_H_ */ diff --git a/include/zephyr/dsp/types.h b/include/zephyr/dsp/types.h new file mode 100644 index 0000000000..52ce2ab203 --- /dev/null +++ b/include/zephyr/dsp/types.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2022 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef INCLUDE_ZEPHYR_DSP_TYPES_H_ +#define INCLUDE_ZEPHYR_DSP_TYPES_H_ + +#include + +/** + * @addtogroup math_dsp + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @typedef q7_t + * @brief 8-bit fractional data type in 1.7 format. + */ +typedef int8_t q7_t; + +/** + * @typedef q15_t + * @brief 16-bit fractional data type in 1.15 format. + */ +typedef int16_t q15_t; + +/** + * @typedef q31_t + * @brief 32-bit fractional data type in 1.31 format. + */ +typedef int32_t q31_t; + +/** + * @typedef q63_t + * @brief 64-bit fractional data type in 1.63 format. + */ +typedef int64_t q63_t; + +/** + * @typedef float16_t + * @brief 16-bit floating point type definition. + */ +#if defined(CONFIG_FP16) +typedef __fp16 float16_t; +#endif /* CONFIG_FP16 */ + +/** + * @typedef float32_t + * @brief 32-bit floating-point type definition. + */ +typedef float float32_t; + +/** + * @typedef float64_t + * @brief 64-bit floating-point type definition. + */ +typedef double float64_t; + +#ifdef __cplusplus +} +#endif + +/** + * @} + */ + +#endif /* INCLUDE_ZEPHYR_DSP_TYPES_H_ */ diff --git a/modules/Kconfig.cmsis b/modules/Kconfig.cmsis index 19ab657e66..c14870c142 100644 --- a/modules/Kconfig.cmsis +++ b/modules/Kconfig.cmsis @@ -22,7 +22,7 @@ endif menuconfig CMSIS_DSP bool "CMSIS-DSP Library Support" - depends on (CPU_CORTEX && NEWLIB_LIBC) || ARCH_POSIX + depends on NEWLIB_LIBC || ARCH_POSIX if CMSIS_DSP source "modules/Kconfig.cmsis_dsp" diff --git a/modules/Kconfig.cmsis_dsp b/modules/Kconfig.cmsis_dsp index 86704b392a..6b5f936ccc 100644 --- a/modules/Kconfig.cmsis_dsp +++ b/modules/Kconfig.cmsis_dsp @@ -29,6 +29,7 @@ config CMSIS_DSP_BASICMATH config CMSIS_DSP_COMPLEXMATH bool "Complex Math Functions" imply CMSIS_DSP_FASTMATH + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Complex Math Functions, which support the following operations: @@ -42,6 +43,7 @@ config CMSIS_DSP_COMPLEXMATH config CMSIS_DSP_CONTROLLER bool "Controller Functions" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Controller Functions, which support the following operations: @@ -61,6 +63,7 @@ config CMSIS_DSP_FASTMATH bool "Fast Math Functions" select CMSIS_DSP_TABLES imply CMSIS_DSP_TABLES_ALL_FAST + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Fast Math Functions, which support the following operations: @@ -74,6 +77,7 @@ config CMSIS_DSP_FILTERING bool "Filtering Functions" imply CMSIS_DSP_FASTMATH imply CMSIS_DSP_SUPPORT + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Filtering Functions, which support the following operations: @@ -99,6 +103,7 @@ config CMSIS_DSP_FILTERING config CMSIS_DSP_INTERPOLATION bool "Interpolation Functions" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Interpolation Functions, which support the following operations: @@ -109,6 +114,7 @@ config CMSIS_DSP_INTERPOLATION config CMSIS_DSP_MATRIX bool "Matrix Functions" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Matrix Functions, which support the following operations: @@ -127,6 +133,7 @@ config CMSIS_DSP_MATRIX config CMSIS_DSP_QUATERNIONMATH bool "Quaternion Math Functions" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Quaternion Math Functions, which support the following operations: @@ -142,6 +149,7 @@ config CMSIS_DSP_STATISTICS bool "Statistics Functions" imply CMSIS_DSP_BASICMATH imply CMSIS_DSP_FASTMATH + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Statistics Functions, which support the following operations: @@ -161,6 +169,7 @@ config CMSIS_DSP_STATISTICS config CMSIS_DSP_SUPPORT bool "Support Functions" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Support Functions, which support the following operations: @@ -180,6 +189,7 @@ config CMSIS_DSP_TRANSFORM bool "Transform Functions" select CMSIS_DSP_TABLES imply CMSIS_DSP_TABLES_ALL_FFT + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Transform Functions, which support the following transformations: @@ -191,6 +201,7 @@ config CMSIS_DSP_TRANSFORM config CMSIS_DSP_SVM bool "Support Vector Machine Functions" select CMSIS_DSP_TABLES + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Support Vector Machine Functions, which support the following algorithms: @@ -203,6 +214,7 @@ config CMSIS_DSP_SVM config CMSIS_DSP_BAYES bool "Bayesian Estimators" imply CMSIS_DSP_STATISTICS + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Bayesian Estimator Functions, which implements the naive gaussian Bayes estimator. @@ -210,6 +222,7 @@ config CMSIS_DSP_BAYES config CMSIS_DSP_DISTANCE bool "Distance Functions" imply CMSIS_DSP_STATISTICS + depends on CPU_CORTEX || ARCH_POSIX help This option enables the Distance Functions, which support the following distance computation algorithms: @@ -238,6 +251,7 @@ config CMSIS_DSP_DISTANCE menuconfig CMSIS_DSP_TABLES bool "Look-up Tables" + depends on CPU_CORTEX || ARCH_POSIX help This option enables the static look-up tables used by the DSP functions to compute results. diff --git a/subsys/CMakeLists.txt b/subsys/CMakeLists.txt index a7f385cc54..44d1f82eab 100644 --- a/subsys/CMakeLists.txt +++ b/subsys/CMakeLists.txt @@ -19,6 +19,7 @@ add_subdirectory(random) add_subdirectory(storage) add_subdirectory_ifdef(CONFIG_SETTINGS settings) add_subdirectory(fb) +add_subdirectory(dsp) add_subdirectory(portability) add_subdirectory(pm) add_subdirectory(stats) diff --git a/subsys/Kconfig b/subsys/Kconfig index 66d69d3414..2beb1e91d4 100644 --- a/subsys/Kconfig +++ b/subsys/Kconfig @@ -32,6 +32,8 @@ source "subsys/logging/Kconfig" source "subsys/lorawan/Kconfig" +source "subsys/dsp/Kconfig" + source "subsys/mgmt/Kconfig" source "subsys/modbus/Kconfig" diff --git a/subsys/dsp/CMakeLists.txt b/subsys/dsp/CMakeLists.txt new file mode 100644 index 0000000000..a8ebd0ca92 --- /dev/null +++ b/subsys/dsp/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright (c) 2022 Google LLC +# SPDX-License-Identifier: Apache-2.0 + +zephyr_library_named(zdsp) + +add_subdirectory_ifdef(CONFIG_DSP_BACKEND_CMSIS cmsis) + +zephyr_link_libraries(zdsp) diff --git a/subsys/dsp/Kconfig b/subsys/dsp/Kconfig new file mode 100644 index 0000000000..e053d9f6a3 --- /dev/null +++ b/subsys/dsp/Kconfig @@ -0,0 +1,26 @@ +# Copyright (c) 2022 Google LLC +# SPDX-License-Identifier: Apache-2.0 + +config DSP_BACKEND_HAS_STATIC + bool + +choice DSP_BACKEND + prompt "DSP library backend selection" + default DSP_BACKEND_CMSIS if CMSIS_DSP + default DSP_BACKEND_CUSTOM + +config DSP_BACKEND_CMSIS + bool "Use the CMSIS-DSP library as the math backend" + depends on CMSIS_DSP + select DSP_BACKEND_HAS_STATIC + help + Implement the various zephyr DSP functions using the CMSIS-DSP library. This feature + requires the CMSIS module to be selected. + +config DSP_BACKEND_CUSTOM + bool "Do not use any Zephyr backends for DSP" + help + Rely on the application to provide a custom DSP backend. The implementation should be + added to the 'zdsp' build target by the application or one of its modules. + +endchoice diff --git a/subsys/dsp/cmsis/CMakeLists.txt b/subsys/dsp/cmsis/CMakeLists.txt new file mode 100644 index 0000000000..c2b1bc0e6f --- /dev/null +++ b/subsys/dsp/cmsis/CMakeLists.txt @@ -0,0 +1,4 @@ +# Copyright (c) 2022 Google LLC +# SPDX-License-Identifier: Apache-2.0 + +target_include_directories(zdsp PUBLIC public) diff --git a/subsys/dsp/cmsis/public/zdsp_backend.h b/subsys/dsp/cmsis/public/zdsp_backend.h new file mode 100644 index 0000000000..39a9d6d4ab --- /dev/null +++ b/subsys/dsp/cmsis/public/zdsp_backend.h @@ -0,0 +1,280 @@ +/* Copyright (c) 2022 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_ +#define SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* This include MUST be done before arm_math.h so we can let the arch specific + * logic set up the right #define values for arm_math.h + */ +#include + +#include + +static inline void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, + uint32_t block_size) +{ + arm_mult_q7(src_a, src_b, dst, block_size); +} +static inline void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size) +{ + arm_mult_q15(src_a, src_b, dst, block_size); +} +static inline void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size) +{ + arm_mult_q31(src_a, src_b, dst, block_size); +} +static inline void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size) +{ + arm_mult_f32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size) +{ + arm_add_q7(src_a, src_b, dst, block_size); +} +static inline void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size) +{ + arm_add_q15(src_a, src_b, dst, block_size); +} +static inline void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size) +{ + arm_add_q31(src_a, src_b, dst, block_size); +} +static inline void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size) +{ + arm_add_f32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size) +{ + arm_sub_q7(src_a, src_b, dst, block_size); +} +static inline void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst, + uint32_t block_size) +{ + arm_sub_q15(src_a, src_b, dst, block_size); +} +static inline void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst, + uint32_t block_size) +{ + arm_sub_q31(src_a, src_b, dst, block_size); +} +static inline void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst, + uint32_t block_size) +{ + arm_sub_f32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst, + uint32_t block_size) +{ + arm_scale_q7(src, scale_fract, shift, dst, block_size); +} +static inline void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst, + uint32_t block_size) +{ + arm_scale_q15(src, scale_fract, shift, dst, block_size); +} +static inline void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst, + uint32_t block_size) +{ + arm_scale_q31(src, scale_fract, shift, dst, block_size); +} + +static inline void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, + uint32_t block_size) +{ + arm_scale_f32(src, scale, dst, block_size); +} + +static inline void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size) +{ + arm_abs_q7(src, dst, block_size); +} +static inline void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size) +{ + arm_abs_q15(src, dst, block_size); +} +static inline void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size) +{ + arm_abs_q31(src, dst, block_size); +} +static inline void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size) +{ + arm_abs_f32(src, dst, block_size); +} + +static inline void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size) +{ + arm_negate_q7(src, dst, block_size); +} +static inline void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size) +{ + arm_negate_q15(src, dst, block_size); +} +static inline void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size) +{ + arm_negate_q31(src, dst, block_size); +} +static inline void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size) +{ + arm_negate_f32(src, dst, block_size); +} + +static inline void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size, + q31_t *dst) +{ + arm_dot_prod_q7(src_a, src_b, block_size, dst); +} +static inline void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size, + q63_t *dst) +{ + arm_dot_prod_q15(src_a, src_b, block_size, dst); +} +static inline void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size, + q63_t *dst) +{ + arm_dot_prod_q31(src_a, src_b, block_size, dst); +} +static inline void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b, + uint32_t block_size, float32_t *dst) +{ + arm_dot_prod_f32(src_a, src_b, block_size, dst); +} + +static inline void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst, uint32_t block_size) +{ + arm_shift_q7(src, shift_bits, dst, block_size); +} +static inline void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst, + uint32_t block_size) +{ + arm_shift_q15(src, shift_bits, dst, block_size); +} +static inline void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst, + uint32_t block_size) +{ + arm_shift_q31(src, shift_bits, dst, block_size); +} + +static inline void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size) +{ + arm_offset_q7(src, offset, dst, block_size); +} +static inline void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst, uint32_t block_size) +{ + arm_offset_q15(src, offset, dst, block_size); +} +static inline void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst, uint32_t block_size) +{ + arm_offset_q31(src, offset, dst, block_size); +} +static inline void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst, + uint32_t block_size) +{ + arm_offset_f32(src, offset, dst, block_size); +} + +static inline void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high, + uint32_t num_samples) +{ + arm_clip_q7(src, dst, low, high, num_samples); +} +static inline void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high, + uint32_t num_samples) +{ + arm_clip_q15(src, dst, low, high, num_samples); +} +static inline void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high, + uint32_t num_samples) +{ + arm_clip_q31(src, dst, low, high, num_samples); +} +static inline void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low, + float32_t high, uint32_t num_samples) +{ + arm_clip_f32(src, dst, low, high, num_samples); +} + +static inline void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size) +{ + arm_and_u8(src_a, src_b, dst, block_size); +} +static inline void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size) +{ + arm_and_u16(src_a, src_b, dst, block_size); +} +static inline void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size) +{ + arm_and_u32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size) +{ + arm_or_u8(src_a, src_b, dst, block_size); +} +static inline void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size) +{ + arm_or_u16(src_a, src_b, dst, block_size); +} +static inline void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size) +{ + arm_or_u32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst, + uint32_t block_size) +{ + arm_xor_u8(src_a, src_b, dst, block_size); +} +static inline void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst, + uint32_t block_size) +{ + arm_xor_u16(src_a, src_b, dst, block_size); +} +static inline void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst, + uint32_t block_size) +{ + arm_xor_u32(src_a, src_b, dst, block_size); +} + +static inline void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size) +{ + arm_not_u8(src, dst, block_size); +} +static inline void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size) +{ + arm_not_u16(src, dst, block_size); +} +static inline void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size) +{ + arm_not_u32(src, dst, block_size); +} + +#ifdef __cplusplus +} +#endif + +#ifdef CONFIG_FP16 +#include "zdsp_backend_f16.h" +#endif /* COIFNG_FP16 */ + +#endif /* SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_ */ diff --git a/subsys/dsp/cmsis/public/zdsp_backend_f16.h b/subsys/dsp/cmsis/public/zdsp_backend_f16.h new file mode 100644 index 0000000000..1bc6364edd --- /dev/null +++ b/subsys/dsp/cmsis/public/zdsp_backend_f16.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2022 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_ +#define SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* This include MUST be done before arm_math.h so we can let the arch specific + * logic set up the right #define values for arm_math.h + */ +#include + +#include + +static inline void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size) +{ + arm_mult_f16(src_a, src_b, dst, block_size); +} + +static inline void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size) +{ + arm_add_f16(src_a, src_b, dst, block_size); +} + +static inline void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst, + uint32_t block_size) +{ + arm_sub_f16(src_a, src_b, dst, block_size); +} + +static inline void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst, + uint32_t block_size) +{ + arm_scale_f16(src, scale, dst, block_size); +} + +static inline void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size) +{ + arm_abs_f16(src, dst, block_size); +} + +static inline void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b, + uint32_t block_size, float16_t *result) +{ + arm_dot_prod_f16(src_a, src_b, block_size, result); +} + +static inline void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst, + uint32_t block_size) +{ + arm_offset_f16(src, offset, dst, block_size); +} + +static inline void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size) +{ + arm_negate_f16(src, dst, block_size); +} + +static inline void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low, + float16_t high, uint32_t num_samples) +{ + arm_clip_f16(src, dst, low, high, num_samples); +} + +#ifdef __cplusplus +} +#endif + +#endif /* SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_ */ diff --git a/west.yml b/west.yml index 3473a1b5de..cc3f462b77 100644 --- a/west.yml +++ b/west.yml @@ -32,7 +32,7 @@ manifest: revision: fe0ab36e0fa7453a4c9b97bedac89709f45cf965 path: modules/lib/chre - name: cmsis - revision: 093de61c2a7d12dc9253daf8692f61f793a9254a + revision: 74981bf893e8b10931464b9945e2143d99a3f0a3 path: modules/hal/cmsis groups: - hal