math: Introduce a DSP basicmath subsystem with a cmsis backend

Introduce an API mirroring the CMSIS-DSP's basicmath. If CMSIS_DSP is enabled, then it will by default be used as a backend. Developers may opt into a custom backend by setting CONFIG_DSP_BACKEND_CMSIS=n. If done, the application must provide `zdsp_backend/dsp.h` and optionally implement the functions in its own .c files. Signed-off-by: Yuval Peress <peress@google.com>
2022-10-06 23:54:46 -06:00 · 2022-10-06 23:54:46 -06:00 · b38445eaa0
parent 0433965982
commit b38445eaa0
20 changed files with 1637 additions and 3 deletions
--- a/1
+++ b/1
@ -756,6 +756,7 @@ scripts/build/gen_image_info.py           @tejlmand
 /subsys/debug/gdbstub.c                   @ceolin
 /subsys/dfu/                              @de-nordic @nordicjm
 /subsys/disk/                             @jfischer-no
+/subsys/dsp/                              @yperess
 /subsys/tracing/                          @nashif
 /subsys/debug/asan_hacks.c                @aescolar @daor-oti
 /subsys/demand_paging/                    @dcpleung @nashif
--- a/MAINTAINERS.yml
+++ b/MAINTAINERS.yml
@ -324,6 +324,17 @@ CMSIS API layer:
    - "area: CMSIS API Layer"
    - "area: Portability"

+DSP subsystem:
+  status: maintained
+  maintainers:
+    - stephanosio
+    - yperess
+  files:
+    - subsys/dsp/
+    - tests/subsys/dsp/
+  labels:
+    - "area: DSP"
+
 CMSIS-DSP integration:
  status: maintained
  maintainers:
@ -334,7 +345,6 @@ CMSIS-DSP integration:
    - modules/Kconfig.cmsis_dsp
    - tests/benchmarks/cmsis_dsp/
    - tests/lib/cmsis_dsp/
-    - tests/subsys/dsp/
  labels:
    - "area: CMSIS-DSP"

--- a/doc/develop/api/overview.rst
+++ b/doc/develop/api/overview.rst
@ -316,3 +316,7 @@ between major releases are available in the :ref:`zephyr_release_notes`.
   * - :ref:`watchdog_api`
     - Stable
     - 1.0
+
+   * - :ref:`zdsp_api`
+     - Experimental
+     - 3.3
--- a/doc/services/dsp/index.rst
+++ b/doc/services/dsp/index.rst
@ -0,0 +1,61 @@
+.. _zdsp_api:
+
+Digital Signal Processing (DSP)
+###############################
+
+.. contents::
+    :local:
+    :depth: 2
+
+The DSP API provides an architecture agnostic way for signal processing.
+Currently, the API will work on any architecture but will likely not be
+optimized. The status of the various architectures can be found below:
+
+--------------+-------------+
+| Architecture | Status      |
+--------------+-------------+
+| ARC          | Unoptimized |
+| ARM          | Optimized   |
+| ARM64        | Optimized   |
+| MIPS         | Unoptimized |
+| NIOS2        | Unoptimized |
+| POSIX        | Unoptimized |
+| RISCV        | Unoptimized |
+| RISCV64      | Unoptimized |
+| SPARC        | Unoptimized |
+| X86          | Unoptimized |
+| XTENSA       | Unoptimized |
+--------------+-------------+
+
+Using zDSP
+**********
+
+zDSP provides various backend options which are selected automatically for the
+application. By default, including the CMSIS module will enable all
+architectures to use the zDSP APIs. This can be done by setting::
+
+	CONFIG_CMSIS_DSP=y
+
+If your application requires some additional customization, it's possible to
+enable :kconfig:option:`CONFIG_DSP_BACKEND_CUSTOM` which means that the
+application is responsible for providing the implementation of the zDSP
+library.
+
+Optimizing for your architecture
+********************************
+
+If your architecture is showing as ``Unoptimized``, it's possible to add a new
+zDSP backend to better support it. To do that, a new Kconfig option should be
+added to `subsys/dsp/Kconfig`_ along with the required dependencies and the
+``default`` set for ``DSP_BACKEND`` Kconfig choice.
+
+Next, the implementation should be added at ``subsys/dsp/<backend>/`` and
+linked in at `subsys/dsp/CMakeLists.txt`_.
+
+API Reference
+*************
+
+.. doxygengroup:: math_dsp
+
+.. _subsys/dsp/Kconfig: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/Kconfig
+.. _subsys/dsp/CMakeLists.txt: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/CMakeLists.txt
--- a/doc/services/index.rst
+++ b/doc/services/index.rst
@ -10,6 +10,7 @@ OS Services
   crypto/index
   debugging/index.rst
   device_mgmt/index
+   dsp/index.rst
   file_system/index.rst
   formatted_output.rst
   ipc/index.rst
--- a/doc/zephyr.doxyfile.in
+++ b/doc/zephyr.doxyfile.in
@ -2316,6 +2316,7 @@ PREDEFINED             = __DOXYGEN__ \
                         CONFIG_ERRNO \
                         CONFIG_FLASH_JESD216_API \
                         CONFIG_FLASH_PAGE_LAYOUT \
+                         CONFIG_FP16 \
                         CONFIG_FPU \
                         CONFIG_FPU_SHARING \
                         CONFIG_GDBSTUB \
--- a/include/zephyr/dsp/basicmath.h
+++ b/include/zephyr/dsp/basicmath.h
@ -0,0 +1,920 @@
+/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/basicmath.h
+ *
+ * @brief Public APIs for DSP basicmath
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_H_
+#define INCLUDE_ZEPHYR_DSP_BASICMATH_H_
+
+#include <zephyr/dsp/dsp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @ingroup math_dsp
+ * @defgroup math_dsp_basic Basic Math Functions
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_mult Vector Multiplication
+ *
+ * Element-by-element multiplication of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] * src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Q7 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q15 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Q31 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Floating-point vector multiplication.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_add Vector Addition
+ *
+ * Element-by-element addition of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] + src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector addition.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q7 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief Q15 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q31 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_sub Vector Subtraction
+ *
+ * Element-by-element subtraction of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] - src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector subtraction.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q7 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief Q15 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q31 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_scale Vector Scale
+ *
+ * Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
+ * <pre>
+ *     dst[n] = src[n] * scale,   0 <= n < block_size.
+ * </pre>
+ *
+ * In the fixed-point Q7, Q15, and Q31 functions, scale is represented by a fractional
+ * multiplication <code>scale_fract</code> and an arithmetic shift <code>shift</code>. The shift
+ * allows the gain of the scaling operation to exceed 1.0. The algorithm used with fixed-point data
+ * is:
+ * <pre>
+ *     dst[n] = (src[n] * scale_fract) << shift,   0 <= n < block_size.
+ * </pre>
+ *
+ * The overall scale factor applied to the fixed-point data is
+ * <pre>
+ *     scale = scale_fract * 2^shift.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer.
+ * @{
+ */
+
+/**
+ * @brief Multiplies a floating-point vector by a scalar.
+ * @param[in]  src        points to the input vector
+ * @param[in]  scale      scale factor to be applied
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q7 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.7 format.
+ *   These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to
+ *   1.7 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q15 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.15 format.
+ *   These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to
+ *   1.15 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q31 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.31 format.
+ *   These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to
+ *   1.31 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_abs Vector Absolute Value
+ *
+ * Computes the absolute value of a vector on an element-by-element basis.
+ * <pre>
+ *     dst[n] = abs(src[n]),   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector absolute value.
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q7 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q15 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q31 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value
+ *   0x7FFFFFFF.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_dot Vector Dot Product
+ *
+ * Computes the dot product of two vectors. The vectors are multiplied element-by-element and then
+ * summed.
+ * <pre>
+ *     sum = src_a[0]*src_b[0] + src_a[1]*src_b[1] + ... + src_a[block_size-1]*src_b[block_size-1]
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b,
+				      uint32_t block_size, float32_t *result);
+
+/**
+ * @brief Dot product of Q7 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these results are added to
+ *   an accumulator in 18.14 format. Nonsaturating additions are used and there is no danger of wrap
+ *   around as long as the vectors are less than 2^18 elements long. The return result is in 18.14
+ *   format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size,
+				     q31_t *result);
+
+/**
+ * @brief Dot product of Q15 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these results are added
+ *   to a 64-bit accumulator in 34.30 format. Nonsaturating additions are used and given that there
+ *   are 33 guard bits in the accumulator there is no risk of overflow. The return result is in
+ *   34.30 format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size,
+				      q63_t *result);
+
+/**
+ * @brief Dot product of Q31 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these are truncated to
+ *   2.48 format by discarding the lower 14 bits. The 2.48 result is then added without saturation
+ *   to a 64-bit accumulator in 16.48 format. There are 15 guard bits in the accumulator and there
+ *   is no risk of overflow as long as the length of the vectors is less than 2^16 elements. The
+ *   return result is in 16.48 format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size,
+				      q63_t *result);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_shift Vector Shift
+ *
+ * Shifts the elements of a fixed-point vector by a specified number of bits.
+ * There are separate functions for Q7, Q15, and Q31 data types. The underlying algorithm used is:
+ * <pre>
+ *     dst[n] = src[n] << shift,   0 <= n < block_size.
+ * </pre>
+ * If <code>shift</code> is positive then the elements of the vector are shifted to the left.
+ * If <code>shift</code> is negative then the elements of the vector are shifted to the right.
+ *
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer.
+ * @{
+ */
+
+/**
+ * @brief  Shifts the elements of a Q7 vector a specified number of bits.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  shift_bits number of bits to shift.  A positive value shifts left; a negative value
+ *                        shifts right.
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief  Shifts the elements of a Q15 vector a specified number of bits.
+ *
+ * @pre Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  shift_bits number of bits to shift.  A positive value shifts left; a negative value
+ *                        shifts right.
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief  Shifts the elements of a Q31 vector a specified number of bits.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  shift_bits  number of bits to shift.  A positive value shifts left; a negative value
+ * shifts right.
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_offset Vector Offset
+ *
+ * Adds a constant offset to each element of a vector.
+ * <pre>
+ *     dst[n] = src[n] + offset,   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ *
+ * @{
+ */
+
+/**
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q7 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q15 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q31 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_negate Vector Negate
+ *
+ * Negates the elements of a vector.
+ * <pre>
+ *     dst[n] = -src[n],   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ *
+ * @{
+ */
+
+/**
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q7 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q15 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q31 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_and Vector bitwise AND
+ *
+ * Compute the logical bitwise AND.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_or Vector bitwise OR
+ *
+ * Compute the logical bitwise OR.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_not Vector bitwise NOT
+ *
+ * Compute the logical bitwise NOT.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_xor Vector bitwise XOR
+ *
+ * Compute the logical bitwise XOR.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_clip Vector Clipping
+ *
+ * Element-by-element clipping of a value.
+ *
+ * The value is constrained between 2 bounds.
+ *
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief         Elementwise floating-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low,
+				  float32_t high, uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high,
+				  uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high,
+				  uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high,
+				 uint32_t num_samples);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef CONFIG_FP16
+#include <zephyr/dsp/basicmath_f16.h>
+#endif /* CONFIG_FP16 */
+
+#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_H_ */
--- a/include/zephyr/dsp/basicmath_f16.h
+++ b/include/zephyr/dsp/basicmath_f16.h
@ -0,0 +1,124 @@
+/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/basicmath_f16.h
+ *
+ * @brief Public APIs for DSP basicmath for 16 bit floating point
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_
+#define INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_
+
+#ifndef CONFIG_FP16
+#error "Cannot use float16 DSP functionality without CONFIG_FP16 enabled"
+#endif /* CONFIG_FP16 */
+
+#include <zephyr/dsp/dsp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @ingroup math_dsp_basic_mult
+ * @brief Floating-point vector multiplication.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_add
+ * @brief Floating-point vector addition.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_sub
+ * @brief Floating-point vector subtraction.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_scale
+ * @brief Multiplies a floating-point vector by a scalar.
+ * @param[in]  src        points to the input vector
+ * @param[in]  scale      scale factor to be applied
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_abs
+ * @brief Floating-point vector absolute value.
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_dot
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b,
+				      uint32_t block_size, float16_t *result);
+
+/**
+ * @ingroup math_dsp_basic_offset
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_negate
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_clip
+ * @brief         Elementwise floating-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low,
+				  float16_t high, uint32_t num_samples);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_ */
--- a/include/zephyr/dsp/dsp.h
+++ b/include/zephyr/dsp/dsp.h
@ -0,0 +1,31 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/dsp.h
+ *
+ * @brief Public APIs for Digital Signal Processing (DSP) math.
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_DSP_H_
+#define INCLUDE_ZEPHYR_DSP_DSP_H_
+
+#ifdef CONFIG_DSP_BACKEND_HAS_STATIC
+#define DSP_FUNC_SCOPE static
+#else
+#define DSP_FUNC_SCOPE
+#endif
+
+/**
+ * @brief DSP Interface
+ * @defgroup math_dsp DSP Interface
+ */
+
+#include <zephyr/dsp/types.h>
+
+#include <zephyr/dsp/basicmath.h>
+
+#include "zdsp_backend.h"
+
+#endif /* INCLUDE_ZEPHYR_DSP_DSP_H_ */
--- a/include/zephyr/dsp/types.h
+++ b/include/zephyr/dsp/types.h
@ -0,0 +1,71 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_TYPES_H_
+#define INCLUDE_ZEPHYR_DSP_TYPES_H_
+
+#include <stdint.h>
+
+/**
+ * @addtogroup math_dsp
+ * @{
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @typedef q7_t
+ * @brief 8-bit fractional data type in 1.7 format.
+ */
+typedef int8_t q7_t;
+
+/**
+ * @typedef q15_t
+ * @brief 16-bit fractional data type in 1.15 format.
+ */
+typedef int16_t q15_t;
+
+/**
+ * @typedef q31_t
+ * @brief 32-bit fractional data type in 1.31 format.
+ */
+typedef int32_t q31_t;
+
+/**
+ * @typedef q63_t
+ * @brief 64-bit fractional data type in 1.63 format.
+ */
+typedef int64_t q63_t;
+
+/**
+ * @typedef float16_t
+ * @brief 16-bit floating point type definition.
+ */
+#if defined(CONFIG_FP16)
+typedef __fp16 float16_t;
+#endif /* CONFIG_FP16 */
+
+/**
+ * @typedef float32_t
+ * @brief 32-bit floating-point type definition.
+ */
+typedef float float32_t;
+
+/**
+ * @typedef float64_t
+ * @brief 64-bit floating-point type definition.
+ */
+typedef double float64_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @}
+ */
+
+#endif /* INCLUDE_ZEPHYR_DSP_TYPES_H_ */
--- a/modules/Kconfig.cmsis
+++ b/modules/Kconfig.cmsis
@ -22,7 +22,7 @@ endif

 menuconfig CMSIS_DSP
 	bool "CMSIS-DSP Library Support"
-	depends on (CPU_CORTEX && NEWLIB_LIBC) || ARCH_POSIX
+	depends on NEWLIB_LIBC || ARCH_POSIX

 if CMSIS_DSP
 source "modules/Kconfig.cmsis_dsp"
--- a/modules/Kconfig.cmsis_dsp
+++ b/modules/Kconfig.cmsis_dsp
@ -29,6 +29,7 @@ config CMSIS_DSP_BASICMATH
 config CMSIS_DSP_COMPLEXMATH
 	bool "Complex Math Functions"
 	imply CMSIS_DSP_FASTMATH
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Complex Math Functions, which support the
 	  following operations:
@ -42,6 +43,7 @@ config CMSIS_DSP_COMPLEXMATH

 config CMSIS_DSP_CONTROLLER
 	bool "Controller Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Controller Functions, which support the
 	  following operations:
@ -61,6 +63,7 @@ config CMSIS_DSP_FASTMATH
 	bool "Fast Math Functions"
 	select CMSIS_DSP_TABLES
 	imply CMSIS_DSP_TABLES_ALL_FAST
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Fast Math Functions, which support the
 	  following operations:
@ -74,6 +77,7 @@ config CMSIS_DSP_FILTERING
 	bool "Filtering Functions"
 	imply CMSIS_DSP_FASTMATH
 	imply CMSIS_DSP_SUPPORT
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Filtering Functions, which support the
 	  following operations:
@ -99,6 +103,7 @@ config CMSIS_DSP_FILTERING

 config CMSIS_DSP_INTERPOLATION
 	bool "Interpolation Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Interpolation Functions, which support the
 	  following operations:
@ -109,6 +114,7 @@ config CMSIS_DSP_INTERPOLATION

 config CMSIS_DSP_MATRIX
 	bool "Matrix Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Matrix Functions, which support the following
 	  operations:
@ -127,6 +133,7 @@ config CMSIS_DSP_MATRIX

 config CMSIS_DSP_QUATERNIONMATH
 	bool "Quaternion Math Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Quaternion Math Functions, which support the
 	  following operations:
@ -142,6 +149,7 @@ config CMSIS_DSP_STATISTICS
 	bool "Statistics Functions"
 	imply CMSIS_DSP_BASICMATH
 	imply CMSIS_DSP_FASTMATH
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Statistics Functions, which support the
 	  following operations:
@ -161,6 +169,7 @@ config CMSIS_DSP_STATISTICS

 config CMSIS_DSP_SUPPORT
 	bool "Support Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Support Functions, which support the
 	  following operations:
@ -180,6 +189,7 @@ config CMSIS_DSP_TRANSFORM
 	bool "Transform Functions"
 	select CMSIS_DSP_TABLES
 	imply CMSIS_DSP_TABLES_ALL_FFT
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Transform Functions, which support the
 	  following transformations:
@ -191,6 +201,7 @@ config CMSIS_DSP_TRANSFORM
 config CMSIS_DSP_SVM
 	bool "Support Vector Machine Functions"
 	select CMSIS_DSP_TABLES
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Support Vector Machine Functions, which
 	  support the following algorithms:
@ -203,6 +214,7 @@ config CMSIS_DSP_SVM
 config CMSIS_DSP_BAYES
 	bool "Bayesian Estimators"
 	imply CMSIS_DSP_STATISTICS
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Bayesian Estimator Functions, which
 	  implements the naive gaussian Bayes estimator.
@ -210,6 +222,7 @@ config CMSIS_DSP_BAYES
 config CMSIS_DSP_DISTANCE
 	bool "Distance Functions"
 	imply CMSIS_DSP_STATISTICS
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Distance Functions, which support the
 	  following distance computation algorithms:
@ -238,6 +251,7 @@ config CMSIS_DSP_DISTANCE

 menuconfig CMSIS_DSP_TABLES
 	bool "Look-up Tables"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the static look-up tables used by the DSP
 	  functions to compute results.
--- a/subsys/CMakeLists.txt
+++ b/subsys/CMakeLists.txt
@ -19,6 +19,7 @@ add_subdirectory(random)
 add_subdirectory(storage)
 add_subdirectory_ifdef(CONFIG_SETTINGS             settings)
 add_subdirectory(fb)
+add_subdirectory(dsp)
 add_subdirectory(portability)
 add_subdirectory(pm)
 add_subdirectory(stats)
--- a/subsys/Kconfig
+++ b/subsys/Kconfig
@ -32,6 +32,8 @@ source "subsys/logging/Kconfig"

 source "subsys/lorawan/Kconfig"

+source "subsys/dsp/Kconfig"
+
 source "subsys/mgmt/Kconfig"

 source "subsys/modbus/Kconfig"
--- a/subsys/dsp/CMakeLists.txt
+++ b/subsys/dsp/CMakeLists.txt
@ -0,0 +1,8 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+zephyr_library_named(zdsp)
+
+add_subdirectory_ifdef(CONFIG_DSP_BACKEND_CMSIS cmsis)
+
+zephyr_link_libraries(zdsp)
--- a/subsys/dsp/Kconfig
+++ b/subsys/dsp/Kconfig
@ -0,0 +1,26 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+config DSP_BACKEND_HAS_STATIC
+	bool
+
+choice DSP_BACKEND
+	prompt "DSP library backend selection"
+	default DSP_BACKEND_CMSIS if CMSIS_DSP
+	default DSP_BACKEND_CUSTOM
+
+config DSP_BACKEND_CMSIS
+	bool "Use the CMSIS-DSP library as the math backend"
+	depends on CMSIS_DSP
+	select DSP_BACKEND_HAS_STATIC
+	help
+	  Implement the various zephyr DSP functions using the CMSIS-DSP library. This feature
+	  requires the CMSIS module to be selected.
+
+config DSP_BACKEND_CUSTOM
+	bool "Do not use any Zephyr backends for DSP"
+	help
+	  Rely on the application to provide a custom DSP backend. The implementation should be
+	  added to the 'zdsp' build target by the application or one of its modules.
+
+endchoice
--- a/subsys/dsp/cmsis/CMakeLists.txt
+++ b/subsys/dsp/cmsis/CMakeLists.txt
@ -0,0 +1,4 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+target_include_directories(zdsp PUBLIC public)
--- a/subsys/dsp/cmsis/public/zdsp_backend.h
+++ b/subsys/dsp/cmsis/public/zdsp_backend.h
@ -0,0 +1,280 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_
+#define SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This include MUST be done before arm_math.h so we can let the arch specific
+ * logic set up the right #define values for arm_math.h
+ */
+#include <zephyr/kernel.h>
+
+#include <arm_math.h>
+
+static inline void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size)
+{
+	arm_mult_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size)
+{
+	arm_add_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				uint32_t block_size)
+{
+	arm_add_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				uint32_t block_size)
+{
+	arm_add_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				uint32_t block_size)
+{
+	arm_add_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size)
+{
+	arm_sub_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst,
+				 uint32_t block_size)
+{
+	arm_scale_q7(src, scale_fract, shift, dst, block_size);
+}
+static inline void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_q15(src, scale_fract, shift, dst, block_size);
+}
+static inline void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_q31(src, scale_fract, shift, dst, block_size);
+}
+
+static inline void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_f32(src, scale, dst, block_size);
+}
+
+static inline void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size)
+{
+	arm_abs_q7(src, dst, block_size);
+}
+static inline void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size)
+{
+	arm_abs_q15(src, dst, block_size);
+}
+static inline void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size)
+{
+	arm_abs_q31(src, dst, block_size);
+}
+static inline void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size)
+{
+	arm_abs_f32(src, dst, block_size);
+}
+
+static inline void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size)
+{
+	arm_negate_q7(src, dst, block_size);
+}
+static inline void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size)
+{
+	arm_negate_q15(src, dst, block_size);
+}
+static inline void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size)
+{
+	arm_negate_q31(src, dst, block_size);
+}
+static inline void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size)
+{
+	arm_negate_f32(src, dst, block_size);
+}
+
+static inline void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size,
+				    q31_t *dst)
+{
+	arm_dot_prod_q7(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size,
+				     q63_t *dst)
+{
+	arm_dot_prod_q15(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size,
+				     q63_t *dst)
+{
+	arm_dot_prod_q31(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b,
+				     uint32_t block_size, float32_t *dst)
+{
+	arm_dot_prod_f32(src_a, src_b, block_size, dst);
+}
+
+static inline void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst, uint32_t block_size)
+{
+	arm_shift_q7(src, shift_bits, dst, block_size);
+}
+static inline void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst,
+				  uint32_t block_size)
+{
+	arm_shift_q15(src, shift_bits, dst, block_size);
+}
+static inline void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst,
+				  uint32_t block_size)
+{
+	arm_shift_q31(src, shift_bits, dst, block_size);
+}
+
+static inline void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size)
+{
+	arm_offset_q7(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst, uint32_t block_size)
+{
+	arm_offset_q15(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst, uint32_t block_size)
+{
+	arm_offset_q31(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst,
+				   uint32_t block_size)
+{
+	arm_offset_f32(src, offset, dst, block_size);
+}
+
+static inline void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high,
+				uint32_t num_samples)
+{
+	arm_clip_q7(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high,
+				 uint32_t num_samples)
+{
+	arm_clip_q15(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high,
+				 uint32_t num_samples)
+{
+	arm_clip_q31(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low,
+				 float32_t high, uint32_t num_samples)
+{
+	arm_clip_f32(src, dst, low, high, num_samples);
+}
+
+static inline void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size)
+{
+	arm_and_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size)
+{
+	arm_and_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size)
+{
+	arm_and_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			      uint32_t block_size)
+{
+	arm_or_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+			       uint32_t block_size)
+{
+	arm_or_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+			       uint32_t block_size)
+{
+	arm_or_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size)
+{
+	arm_xor_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size)
+{
+	arm_xor_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size)
+{
+	arm_xor_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size)
+{
+	arm_not_u8(src, dst, block_size);
+}
+static inline void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size)
+{
+	arm_not_u16(src, dst, block_size);
+}
+static inline void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size)
+{
+	arm_not_u32(src, dst, block_size);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef CONFIG_FP16
+#include "zdsp_backend_f16.h"
+#endif /* COIFNG_FP16 */
+
+#endif /* SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_ */
--- a/subsys/dsp/cmsis/public/zdsp_backend_f16.h
+++ b/subsys/dsp/cmsis/public/zdsp_backend_f16.h
@ -0,0 +1,75 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_
+#define SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This include MUST be done before arm_math.h so we can let the arch specific
+ * logic set up the right #define values for arm_math.h
+ */
+#include <zephyr/kernel.h>
+
+#include <arm_math_f16.h>
+
+static inline void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				uint32_t block_size)
+{
+	arm_add_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_f16(src, scale, dst, block_size);
+}
+
+static inline void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size)
+{
+	arm_abs_f16(src, dst, block_size);
+}
+
+static inline void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b,
+				     uint32_t block_size, float16_t *result)
+{
+	arm_dot_prod_f16(src_a, src_b, block_size, result);
+}
+
+static inline void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst,
+				   uint32_t block_size)
+{
+	arm_offset_f16(src, offset, dst, block_size);
+}
+
+static inline void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size)
+{
+	arm_negate_f16(src, dst, block_size);
+}
+
+static inline void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low,
+				 float16_t high, uint32_t num_samples)
+{
+	arm_clip_f16(src, dst, low, high, num_samples);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_ */
--- a/west.yml
+++ b/west.yml
@ -32,7 +32,7 @@ manifest:
      revision: fe0ab36e0fa7453a4c9b97bedac89709f45cf965
      path: modules/lib/chre
    - name: cmsis
-      revision: 093de61c2a7d12dc9253daf8692f61f793a9254a
+      revision: 74981bf893e8b10931464b9945e2143d99a3f0a3
      path: modules/hal/cmsis
      groups:
        - hal