From b38445eaa03bf8367674b7fd0a4e9445513868e6 Mon Sep 17 00:00:00 2001
From: Yuval Peress <peress@google.com>
Date: Thu, 6 Oct 2022 23:54:46 -0600
Subject: [PATCH] math: Introduce a DSP basicmath subsystem with a cmsis
 backend

Introduce an API mirroring the CMSIS-DSP's basicmath. If CMSIS_DSP is
enabled, then it will by default be used as a backend. Developers may
opt into a custom backend by setting CONFIG_DSP_BACKEND_CMSIS=n. If
done, the application must provide `zdsp_backend/dsp.h` and optionally
implement the functions in its own .c files.

Signed-off-by: Yuval Peress <peress@google.com>
---
 CODEOWNERS                                 |   1 +
 MAINTAINERS.yml                            |  12 +-
 doc/develop/api/overview.rst               |   4 +
 doc/services/dsp/index.rst                 |  61 ++
 doc/services/index.rst                     |   1 +
 doc/zephyr.doxyfile.in                     |   1 +
 include/zephyr/dsp/basicmath.h             | 920 +++++++++++++++++++++
 include/zephyr/dsp/basicmath_f16.h         | 124 +++
 include/zephyr/dsp/dsp.h                   |  31 +
 include/zephyr/dsp/types.h                 |  71 ++
 modules/Kconfig.cmsis                      |   2 +-
 modules/Kconfig.cmsis_dsp                  |  14 +
 subsys/CMakeLists.txt                      |   1 +
 subsys/Kconfig                             |   2 +
 subsys/dsp/CMakeLists.txt                  |   8 +
 subsys/dsp/Kconfig                         |  26 +
 subsys/dsp/cmsis/CMakeLists.txt            |   4 +
 subsys/dsp/cmsis/public/zdsp_backend.h     | 280 +++++++
 subsys/dsp/cmsis/public/zdsp_backend_f16.h |  75 ++
 west.yml                                   |   2 +-
 20 files changed, 1637 insertions(+), 3 deletions(-)
 create mode 100644 doc/services/dsp/index.rst
 create mode 100644 include/zephyr/dsp/basicmath.h
 create mode 100644 include/zephyr/dsp/basicmath_f16.h
 create mode 100644 include/zephyr/dsp/dsp.h
 create mode 100644 include/zephyr/dsp/types.h
 create mode 100644 subsys/dsp/CMakeLists.txt
 create mode 100644 subsys/dsp/Kconfig
 create mode 100644 subsys/dsp/cmsis/CMakeLists.txt
 create mode 100644 subsys/dsp/cmsis/public/zdsp_backend.h
 create mode 100644 subsys/dsp/cmsis/public/zdsp_backend_f16.h

diff --git a/CODEOWNERS b/CODEOWNERS
index c5383bcc13..018bc0d4cd 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -756,6 +756,7 @@ scripts/build/gen_image_info.py           @tejlmand
 /subsys/debug/gdbstub.c                   @ceolin
 /subsys/dfu/                              @de-nordic @nordicjm
 /subsys/disk/                             @jfischer-no
+/subsys/dsp/                              @yperess
 /subsys/tracing/                          @nashif
 /subsys/debug/asan_hacks.c                @aescolar @daor-oti
 /subsys/demand_paging/                    @dcpleung @nashif
diff --git a/MAINTAINERS.yml b/MAINTAINERS.yml
index a70cf9ea50..f66212f550 100644
--- a/MAINTAINERS.yml
+++ b/MAINTAINERS.yml
@@ -324,6 +324,17 @@ CMSIS API layer:
     - "area: CMSIS API Layer"
     - "area: Portability"
 
+DSP subsystem:
+  status: maintained
+  maintainers:
+    - stephanosio
+    - yperess
+  files:
+    - subsys/dsp/
+    - tests/subsys/dsp/
+  labels:
+    - "area: DSP"
+
 CMSIS-DSP integration:
   status: maintained
   maintainers:
@@ -334,7 +345,6 @@ CMSIS-DSP integration:
     - modules/Kconfig.cmsis_dsp
     - tests/benchmarks/cmsis_dsp/
     - tests/lib/cmsis_dsp/
-    - tests/subsys/dsp/
   labels:
     - "area: CMSIS-DSP"
 
diff --git a/doc/develop/api/overview.rst b/doc/develop/api/overview.rst
index a7836c388a..b6f686ee6c 100644
--- a/doc/develop/api/overview.rst
+++ b/doc/develop/api/overview.rst
@@ -316,3 +316,7 @@ between major releases are available in the :ref:`zephyr_release_notes`.
    * - :ref:`watchdog_api`
      - Stable
      - 1.0
+
+   * - :ref:`zdsp_api`
+     - Experimental
+     - 3.3
diff --git a/doc/services/dsp/index.rst b/doc/services/dsp/index.rst
new file mode 100644
index 0000000000..6ce6acf384
--- /dev/null
+++ b/doc/services/dsp/index.rst
@@ -0,0 +1,61 @@
+.. _zdsp_api:
+
+Digital Signal Processing (DSP)
+###############################
+
+.. contents::
+    :local:
+    :depth: 2
+
+The DSP API provides an architecture agnostic way for signal processing.
+Currently, the API will work on any architecture but will likely not be
+optimized. The status of the various architectures can be found below:
+
++--------------+-------------+
+| Architecture | Status      |
++--------------+-------------+
+| ARC          | Unoptimized |
+| ARM          | Optimized   |
+| ARM64        | Optimized   |
+| MIPS         | Unoptimized |
+| NIOS2        | Unoptimized |
+| POSIX        | Unoptimized |
+| RISCV        | Unoptimized |
+| RISCV64      | Unoptimized |
+| SPARC        | Unoptimized |
+| X86          | Unoptimized |
+| XTENSA       | Unoptimized |
++--------------+-------------+
+
+Using zDSP
+**********
+
+zDSP provides various backend options which are selected automatically for the
+application. By default, including the CMSIS module will enable all
+architectures to use the zDSP APIs. This can be done by setting::
+
+	CONFIG_CMSIS_DSP=y
+
+If your application requires some additional customization, it's possible to
+enable :kconfig:option:`CONFIG_DSP_BACKEND_CUSTOM` which means that the
+application is responsible for providing the implementation of the zDSP
+library.
+
+Optimizing for your architecture
+********************************
+
+If your architecture is showing as ``Unoptimized``, it's possible to add a new
+zDSP backend to better support it. To do that, a new Kconfig option should be
+added to `subsys/dsp/Kconfig`_ along with the required dependencies and the
+``default`` set for ``DSP_BACKEND`` Kconfig choice.
+
+Next, the implementation should be added at ``subsys/dsp/<backend>/`` and
+linked in at `subsys/dsp/CMakeLists.txt`_.
+
+API Reference
+*************
+
+.. doxygengroup:: math_dsp
+
+.. _subsys/dsp/Kconfig: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/Kconfig
+.. _subsys/dsp/CMakeLists.txt: https://github.com/zephyrproject-rtos/zephyr/blob/main/subsys/dsp/CMakeLists.txt
diff --git a/doc/services/index.rst b/doc/services/index.rst
index 61c1695ac4..675edf935d 100644
--- a/doc/services/index.rst
+++ b/doc/services/index.rst
@@ -10,6 +10,7 @@ OS Services
    crypto/index
    debugging/index.rst
    device_mgmt/index
+   dsp/index.rst
    file_system/index.rst
    formatted_output.rst
    ipc/index.rst
diff --git a/doc/zephyr.doxyfile.in b/doc/zephyr.doxyfile.in
index 79e3c2bf91..600d8ac81b 100644
--- a/doc/zephyr.doxyfile.in
+++ b/doc/zephyr.doxyfile.in
@@ -2316,6 +2316,7 @@ PREDEFINED             = __DOXYGEN__ \
                          CONFIG_ERRNO \
                          CONFIG_FLASH_JESD216_API \
                          CONFIG_FLASH_PAGE_LAYOUT \
+                         CONFIG_FP16 \
                          CONFIG_FPU \
                          CONFIG_FPU_SHARING \
                          CONFIG_GDBSTUB \
diff --git a/include/zephyr/dsp/basicmath.h b/include/zephyr/dsp/basicmath.h
new file mode 100644
index 0000000000..0128b3c727
--- /dev/null
+++ b/include/zephyr/dsp/basicmath.h
@@ -0,0 +1,920 @@
+/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/basicmath.h
+ *
+ * @brief Public APIs for DSP basicmath
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_H_
+#define INCLUDE_ZEPHYR_DSP_BASICMATH_H_
+
+#include <zephyr/dsp/dsp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @ingroup math_dsp
+ * @defgroup math_dsp_basic Basic Math Functions
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_mult Vector Multiplication
+ *
+ * Element-by-element multiplication of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] * src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Q7 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q15 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Q31 vector multiplication.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Floating-point vector multiplication.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_add Vector Addition
+ *
+ * Element-by-element addition of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] + src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector addition.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q7 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief Q15 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q31 vector addition.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_sub Vector Subtraction
+ *
+ * Element-by-element subtraction of two vectors.
+ * <pre>
+ *     dst[n] = src_a[n] - src_b[n],   0 <= n < block_size.
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector subtraction.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q7 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief Q15 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief Q31 vector subtraction.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_scale Vector Scale
+ *
+ * Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
+ * <pre>
+ *     dst[n] = src[n] * scale,   0 <= n < block_size.
+ * </pre>
+ *
+ * In the fixed-point Q7, Q15, and Q31 functions, scale is represented by a fractional
+ * multiplication <code>scale_fract</code> and an arithmetic shift <code>shift</code>. The shift
+ * allows the gain of the scaling operation to exceed 1.0. The algorithm used with fixed-point data
+ * is:
+ * <pre>
+ *     dst[n] = (src[n] * scale_fract) << shift,   0 <= n < block_size.
+ * </pre>
+ *
+ * The overall scale factor applied to the fixed-point data is
+ * <pre>
+ *     scale = scale_fract * 2^shift.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer.
+ * @{
+ */
+
+/**
+ * @brief Multiplies a floating-point vector by a scalar.
+ * @param[in]  src        points to the input vector
+ * @param[in]  scale      scale factor to be applied
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q7 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.7 format.
+ *   These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to
+ *   1.7 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q15 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.15 format.
+ *   These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to
+ *   1.15 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief Multiplies a Q31 vector by a scalar.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The input data <code>*src</code> and <code>scale_fract</code> are in 1.31 format.
+ *   These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to
+ *   1.31 format.
+ *
+ * @param[in]  src         points to the input vector
+ * @param[in]  scale_fract fractional portion of the scale value
+ * @param[in]  shift       number of bits to shift the result by
+ * @param[out] dst         points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_abs Vector Absolute Value
+ *
+ * Computes the absolute value of a vector on an element-by-element basis.
+ * <pre>
+ *     dst[n] = abs(src[n]),   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Floating-point vector absolute value.
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q7 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q15 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size);
+
+/**
+ * @brief Q31 vector absolute value.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value
+ *   0x7FFFFFFF.
+ *
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_dot Vector Dot Product
+ *
+ * Computes the dot product of two vectors. The vectors are multiplied element-by-element and then
+ * summed.
+ * <pre>
+ *     sum = src_a[0]*src_b[0] + src_a[1]*src_b[1] + ... + src_a[block_size-1]*src_b[block_size-1]
+ * </pre>
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b,
+				      uint32_t block_size, float32_t *result);
+
+/**
+ * @brief Dot product of Q7 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these results are added to
+ *   an accumulator in 18.14 format. Nonsaturating additions are used and there is no danger of wrap
+ *   around as long as the vectors are less than 2^18 elements long. The return result is in 18.14
+ *   format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size,
+				     q31_t *result);
+
+/**
+ * @brief Dot product of Q15 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these results are added
+ *   to a 64-bit accumulator in 34.30 format. Nonsaturating additions are used and given that there
+ *   are 33 guard bits in the accumulator there is no risk of overflow. The return result is in
+ *   34.30 format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size,
+				      q63_t *result);
+
+/**
+ * @brief Dot product of Q31 vectors.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these are truncated to
+ *   2.48 format by discarding the lower 14 bits. The 2.48 result is then added without saturation
+ *   to a 64-bit accumulator in 16.48 format. There are 15 guard bits in the accumulator and there
+ *   is no risk of overflow as long as the length of the vectors is less than 2^16 elements. The
+ *   return result is in 16.48 format.
+ *
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size,
+				      q63_t *result);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_shift Vector Shift
+ *
+ * Shifts the elements of a fixed-point vector by a specified number of bits.
+ * There are separate functions for Q7, Q15, and Q31 data types. The underlying algorithm used is:
+ * <pre>
+ *     dst[n] = src[n] << shift,   0 <= n < block_size.
+ * </pre>
+ * If <code>shift</code> is positive then the elements of the vector are shifted to the left.
+ * If <code>shift</code> is negative then the elements of the vector are shifted to the right.
+ *
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer.
+ * @{
+ */
+
+/**
+ * @brief  Shifts the elements of a Q7 vector a specified number of bits.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  shift_bits number of bits to shift.  A positive value shifts left; a negative value
+ *                        shifts right.
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @brief  Shifts the elements of a Q15 vector a specified number of bits.
+ *
+ * @pre Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  shift_bits number of bits to shift.  A positive value shifts left; a negative value
+ *                        shifts right.
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @brief  Shifts the elements of a Q31 vector a specified number of bits.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  shift_bits  number of bits to shift.  A positive value shifts left; a negative value
+ * shifts right.
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_offset Vector Offset
+ *
+ * Adds a constant offset to each element of a vector.
+ * <pre>
+ *     dst[n] = src[n] + offset,   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ *
+ * @{
+ */
+
+/**
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q7 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q15 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @brief  Adds a constant offset to a Q31 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
+ *
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_negate Vector Negate
+ *
+ * Negates the elements of a vector.
+ * <pre>
+ *     dst[n] = -src[n],   0 <= n < block_size.
+ * </pre>
+ * The functions support in-place computation allowing the source and destination pointers to
+ * reference the same memory buffer. There are separate functions for floating-point, Q7, Q15, and
+ * Q31 data types.
+ *
+ * @{
+ */
+
+/**
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q7 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q15 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size);
+
+/**
+ * @brief  Negates the elements of a Q31 vector.
+ *
+ * @par Scaling and Overflow Behavior
+ *   The function uses saturating arithmetic.
+ *   The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
+ *
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_and Vector bitwise AND
+ *
+ * Compute the logical bitwise AND.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise AND of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_or Vector bitwise OR
+ *
+ * Compute the logical bitwise OR.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise OR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_not Vector bitwise NOT
+ *
+ * Compute the logical bitwise NOT.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise NOT of a fixed-point vector.
+ * @param[in]     src        points to input vector
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_xor Vector bitwise XOR
+ *
+ * Compute the logical bitwise XOR.
+ *
+ * There are separate functions for uint32_t, uint16_t, and uint7_t data types.
+ * @{
+ */
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+				uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @brief         Compute the logical bitwise XOR of two fixed-point vectors.
+ * @param[in]     src_a      points to input vector A
+ * @param[in]     src_b      points to input vector B
+ * @param[out]    dst        points to output vector
+ * @param[in]     block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @}
+ */
+
+/**
+ * @ingroup math_dsp_basic
+ * @addtogroup math_dsp_basic_clip Vector Clipping
+ *
+ * Element-by-element clipping of a value.
+ *
+ * The value is constrained between 2 bounds.
+ *
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ * @{
+ */
+
+/**
+ * @brief         Elementwise floating-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low,
+				  float32_t high, uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high,
+				  uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high,
+				  uint32_t num_samples);
+
+/**
+ * @brief         Elementwise fixed-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high,
+				 uint32_t num_samples);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef CONFIG_FP16
+#include <zephyr/dsp/basicmath_f16.h>
+#endif /* CONFIG_FP16 */
+
+#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_H_ */
diff --git a/include/zephyr/dsp/basicmath_f16.h b/include/zephyr/dsp/basicmath_f16.h
new file mode 100644
index 0000000000..23239e9a65
--- /dev/null
+++ b/include/zephyr/dsp/basicmath_f16.h
@@ -0,0 +1,124 @@
+/* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/basicmath_f16.h
+ *
+ * @brief Public APIs for DSP basicmath for 16 bit floating point
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_
+#define INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_
+
+#ifndef CONFIG_FP16
+#error "Cannot use float16 DSP functionality without CONFIG_FP16 enabled"
+#endif /* CONFIG_FP16 */
+
+#include <zephyr/dsp/dsp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @ingroup math_dsp_basic_mult
+ * @brief Floating-point vector multiplication.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				  uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_add
+ * @brief Floating-point vector addition.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_sub
+ * @brief Floating-point vector subtraction.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_scale
+ * @brief Multiplies a floating-point vector by a scalar.
+ * @param[in]  src        points to the input vector
+ * @param[in]  scale      scale factor to be applied
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst,
+				   uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_abs
+ * @brief Floating-point vector absolute value.
+ * @param[in]  src        points to the input buffer
+ * @param[out] dst        points to the output buffer
+ * @param[in]  block_size number of samples in each vector
+ */
+DSP_FUNC_SCOPE void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_dot
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  src_a      points to the first input vector
+ * @param[in]  src_b      points to the second input vector
+ * @param[in]  block_size number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+DSP_FUNC_SCOPE void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b,
+				      uint32_t block_size, float16_t *result);
+
+/**
+ * @ingroup math_dsp_basic_offset
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  src       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] dst       points to the output vector
+ * @param[in]  block_size  number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst,
+				    uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_negate
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  src        points to the input vector
+ * @param[out] dst        points to the output vector
+ * @param[in]  block_size number of samples in the vector
+ */
+DSP_FUNC_SCOPE void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size);
+
+/**
+ * @ingroup math_dsp_basic_clip
+ * @brief         Elementwise floating-point clipping
+ * @param[in]     src          points to input values
+ * @param[out]    dst          points to output clipped values
+ * @param[in]     low          lower bound
+ * @param[in]     high         higher bound
+ * @param[in]     num_samples  number of samples to clip
+ */
+DSP_FUNC_SCOPE void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low,
+				  float16_t high, uint32_t num_samples);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* INCLUDE_ZEPHYR_DSP_BASICMATH_F16_H_ */
diff --git a/include/zephyr/dsp/dsp.h b/include/zephyr/dsp/dsp.h
new file mode 100644
index 0000000000..09c757216b
--- /dev/null
+++ b/include/zephyr/dsp/dsp.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file zephyr/dsp/dsp.h
+ *
+ * @brief Public APIs for Digital Signal Processing (DSP) math.
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_DSP_H_
+#define INCLUDE_ZEPHYR_DSP_DSP_H_
+
+#ifdef CONFIG_DSP_BACKEND_HAS_STATIC
+#define DSP_FUNC_SCOPE static
+#else
+#define DSP_FUNC_SCOPE
+#endif
+
+/**
+ * @brief DSP Interface
+ * @defgroup math_dsp DSP Interface
+ */
+
+#include <zephyr/dsp/types.h>
+
+#include <zephyr/dsp/basicmath.h>
+
+#include "zdsp_backend.h"
+
+#endif /* INCLUDE_ZEPHYR_DSP_DSP_H_ */
diff --git a/include/zephyr/dsp/types.h b/include/zephyr/dsp/types.h
new file mode 100644
index 0000000000..52ce2ab203
--- /dev/null
+++ b/include/zephyr/dsp/types.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef INCLUDE_ZEPHYR_DSP_TYPES_H_
+#define INCLUDE_ZEPHYR_DSP_TYPES_H_
+
+#include <stdint.h>
+
+/**
+ * @addtogroup math_dsp
+ * @{
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @typedef q7_t
+ * @brief 8-bit fractional data type in 1.7 format.
+ */
+typedef int8_t q7_t;
+
+/**
+ * @typedef q15_t
+ * @brief 16-bit fractional data type in 1.15 format.
+ */
+typedef int16_t q15_t;
+
+/**
+ * @typedef q31_t
+ * @brief 32-bit fractional data type in 1.31 format.
+ */
+typedef int32_t q31_t;
+
+/**
+ * @typedef q63_t
+ * @brief 64-bit fractional data type in 1.63 format.
+ */
+typedef int64_t q63_t;
+
+/**
+ * @typedef float16_t
+ * @brief 16-bit floating point type definition.
+ */
+#if defined(CONFIG_FP16)
+typedef __fp16 float16_t;
+#endif /* CONFIG_FP16 */
+
+/**
+ * @typedef float32_t
+ * @brief 32-bit floating-point type definition.
+ */
+typedef float float32_t;
+
+/**
+ * @typedef float64_t
+ * @brief 64-bit floating-point type definition.
+ */
+typedef double float64_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @}
+ */
+
+#endif /* INCLUDE_ZEPHYR_DSP_TYPES_H_ */
diff --git a/modules/Kconfig.cmsis b/modules/Kconfig.cmsis
index 19ab657e66..c14870c142 100644
--- a/modules/Kconfig.cmsis
+++ b/modules/Kconfig.cmsis
@@ -22,7 +22,7 @@ endif
 
 menuconfig CMSIS_DSP
 	bool "CMSIS-DSP Library Support"
-	depends on (CPU_CORTEX && NEWLIB_LIBC) || ARCH_POSIX
+	depends on NEWLIB_LIBC || ARCH_POSIX
 
 if CMSIS_DSP
 source "modules/Kconfig.cmsis_dsp"
diff --git a/modules/Kconfig.cmsis_dsp b/modules/Kconfig.cmsis_dsp
index 86704b392a..6b5f936ccc 100644
--- a/modules/Kconfig.cmsis_dsp
+++ b/modules/Kconfig.cmsis_dsp
@@ -29,6 +29,7 @@ config CMSIS_DSP_BASICMATH
 config CMSIS_DSP_COMPLEXMATH
 	bool "Complex Math Functions"
 	imply CMSIS_DSP_FASTMATH
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Complex Math Functions, which support the
 	  following operations:
@@ -42,6 +43,7 @@ config CMSIS_DSP_COMPLEXMATH
 
 config CMSIS_DSP_CONTROLLER
 	bool "Controller Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Controller Functions, which support the
 	  following operations:
@@ -61,6 +63,7 @@ config CMSIS_DSP_FASTMATH
 	bool "Fast Math Functions"
 	select CMSIS_DSP_TABLES
 	imply CMSIS_DSP_TABLES_ALL_FAST
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Fast Math Functions, which support the
 	  following operations:
@@ -74,6 +77,7 @@ config CMSIS_DSP_FILTERING
 	bool "Filtering Functions"
 	imply CMSIS_DSP_FASTMATH
 	imply CMSIS_DSP_SUPPORT
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Filtering Functions, which support the
 	  following operations:
@@ -99,6 +103,7 @@ config CMSIS_DSP_FILTERING
 
 config CMSIS_DSP_INTERPOLATION
 	bool "Interpolation Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Interpolation Functions, which support the
 	  following operations:
@@ -109,6 +114,7 @@ config CMSIS_DSP_INTERPOLATION
 
 config CMSIS_DSP_MATRIX
 	bool "Matrix Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Matrix Functions, which support the following
 	  operations:
@@ -127,6 +133,7 @@ config CMSIS_DSP_MATRIX
 
 config CMSIS_DSP_QUATERNIONMATH
 	bool "Quaternion Math Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Quaternion Math Functions, which support the
 	  following operations:
@@ -142,6 +149,7 @@ config CMSIS_DSP_STATISTICS
 	bool "Statistics Functions"
 	imply CMSIS_DSP_BASICMATH
 	imply CMSIS_DSP_FASTMATH
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Statistics Functions, which support the
 	  following operations:
@@ -161,6 +169,7 @@ config CMSIS_DSP_STATISTICS
 
 config CMSIS_DSP_SUPPORT
 	bool "Support Functions"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Support Functions, which support the
 	  following operations:
@@ -180,6 +189,7 @@ config CMSIS_DSP_TRANSFORM
 	bool "Transform Functions"
 	select CMSIS_DSP_TABLES
 	imply CMSIS_DSP_TABLES_ALL_FFT
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Transform Functions, which support the
 	  following transformations:
@@ -191,6 +201,7 @@ config CMSIS_DSP_TRANSFORM
 config CMSIS_DSP_SVM
 	bool "Support Vector Machine Functions"
 	select CMSIS_DSP_TABLES
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Support Vector Machine Functions, which
 	  support the following algorithms:
@@ -203,6 +214,7 @@ config CMSIS_DSP_SVM
 config CMSIS_DSP_BAYES
 	bool "Bayesian Estimators"
 	imply CMSIS_DSP_STATISTICS
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Bayesian Estimator Functions, which
 	  implements the naive gaussian Bayes estimator.
@@ -210,6 +222,7 @@ config CMSIS_DSP_BAYES
 config CMSIS_DSP_DISTANCE
 	bool "Distance Functions"
 	imply CMSIS_DSP_STATISTICS
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the Distance Functions, which support the
 	  following distance computation algorithms:
@@ -238,6 +251,7 @@ config CMSIS_DSP_DISTANCE
 
 menuconfig CMSIS_DSP_TABLES
 	bool "Look-up Tables"
+	depends on CPU_CORTEX || ARCH_POSIX
 	help
 	  This option enables the static look-up tables used by the DSP
 	  functions to compute results.
diff --git a/subsys/CMakeLists.txt b/subsys/CMakeLists.txt
index a7f385cc54..44d1f82eab 100644
--- a/subsys/CMakeLists.txt
+++ b/subsys/CMakeLists.txt
@@ -19,6 +19,7 @@ add_subdirectory(random)
 add_subdirectory(storage)
 add_subdirectory_ifdef(CONFIG_SETTINGS             settings)
 add_subdirectory(fb)
+add_subdirectory(dsp)
 add_subdirectory(portability)
 add_subdirectory(pm)
 add_subdirectory(stats)
diff --git a/subsys/Kconfig b/subsys/Kconfig
index 66d69d3414..2beb1e91d4 100644
--- a/subsys/Kconfig
+++ b/subsys/Kconfig
@@ -32,6 +32,8 @@ source "subsys/logging/Kconfig"
 
 source "subsys/lorawan/Kconfig"
 
+source "subsys/dsp/Kconfig"
+
 source "subsys/mgmt/Kconfig"
 
 source "subsys/modbus/Kconfig"
diff --git a/subsys/dsp/CMakeLists.txt b/subsys/dsp/CMakeLists.txt
new file mode 100644
index 0000000000..a8ebd0ca92
--- /dev/null
+++ b/subsys/dsp/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+zephyr_library_named(zdsp)
+
+add_subdirectory_ifdef(CONFIG_DSP_BACKEND_CMSIS cmsis)
+
+zephyr_link_libraries(zdsp)
diff --git a/subsys/dsp/Kconfig b/subsys/dsp/Kconfig
new file mode 100644
index 0000000000..e053d9f6a3
--- /dev/null
+++ b/subsys/dsp/Kconfig
@@ -0,0 +1,26 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+config DSP_BACKEND_HAS_STATIC
+	bool
+
+choice DSP_BACKEND
+	prompt "DSP library backend selection"
+	default DSP_BACKEND_CMSIS if CMSIS_DSP
+	default DSP_BACKEND_CUSTOM
+
+config DSP_BACKEND_CMSIS
+	bool "Use the CMSIS-DSP library as the math backend"
+	depends on CMSIS_DSP
+	select DSP_BACKEND_HAS_STATIC
+	help
+	  Implement the various zephyr DSP functions using the CMSIS-DSP library. This feature
+	  requires the CMSIS module to be selected.
+
+config DSP_BACKEND_CUSTOM
+	bool "Do not use any Zephyr backends for DSP"
+	help
+	  Rely on the application to provide a custom DSP backend. The implementation should be
+	  added to the 'zdsp' build target by the application or one of its modules.
+
+endchoice
diff --git a/subsys/dsp/cmsis/CMakeLists.txt b/subsys/dsp/cmsis/CMakeLists.txt
new file mode 100644
index 0000000000..c2b1bc0e6f
--- /dev/null
+++ b/subsys/dsp/cmsis/CMakeLists.txt
@@ -0,0 +1,4 @@
+# Copyright (c) 2022 Google LLC
+# SPDX-License-Identifier: Apache-2.0
+
+target_include_directories(zdsp PUBLIC public)
diff --git a/subsys/dsp/cmsis/public/zdsp_backend.h b/subsys/dsp/cmsis/public/zdsp_backend.h
new file mode 100644
index 0000000000..39a9d6d4ab
--- /dev/null
+++ b/subsys/dsp/cmsis/public/zdsp_backend.h
@@ -0,0 +1,280 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_
+#define SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This include MUST be done before arm_math.h so we can let the arch specific
+ * logic set up the right #define values for arm_math.h
+ */
+#include <zephyr/kernel.h>
+
+#include <arm_math.h>
+
+static inline void zdsp_mult_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst,
+				uint32_t block_size)
+{
+	arm_mult_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_mult_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_add_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size)
+{
+	arm_add_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				uint32_t block_size)
+{
+	arm_add_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				uint32_t block_size)
+{
+	arm_add_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_add_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				uint32_t block_size)
+{
+	arm_add_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_sub_q7(const q7_t *src_a, const q7_t *src_b, q7_t *dst, uint32_t block_size)
+{
+	arm_sub_q7(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_q15(const q15_t *src_a, const q15_t *src_b, q15_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_q15(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_q31(const q31_t *src_a, const q31_t *src_b, q31_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_q31(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_sub_f32(const float32_t *src_a, const float32_t *src_b, float32_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_f32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_scale_q7(const q7_t *src, q7_t scale_fract, int8_t shift, q7_t *dst,
+				 uint32_t block_size)
+{
+	arm_scale_q7(src, scale_fract, shift, dst, block_size);
+}
+static inline void zdsp_scale_q15(const q15_t *src, q15_t scale_fract, int8_t shift, q15_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_q15(src, scale_fract, shift, dst, block_size);
+}
+static inline void zdsp_scale_q31(const q31_t *src, q31_t scale_fract, int8_t shift, q31_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_q31(src, scale_fract, shift, dst, block_size);
+}
+
+static inline void zdsp_scale_f32(const float32_t *src, float32_t scale, float32_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_f32(src, scale, dst, block_size);
+}
+
+static inline void zdsp_abs_q7(const q7_t *src, q7_t *dst, uint32_t block_size)
+{
+	arm_abs_q7(src, dst, block_size);
+}
+static inline void zdsp_abs_q15(const q15_t *src, q15_t *dst, uint32_t block_size)
+{
+	arm_abs_q15(src, dst, block_size);
+}
+static inline void zdsp_abs_q31(const q31_t *src, q31_t *dst, uint32_t block_size)
+{
+	arm_abs_q31(src, dst, block_size);
+}
+static inline void zdsp_abs_f32(const float32_t *src, float32_t *dst, uint32_t block_size)
+{
+	arm_abs_f32(src, dst, block_size);
+}
+
+static inline void zdsp_negate_q7(const q7_t *src, q7_t *dst, uint32_t block_size)
+{
+	arm_negate_q7(src, dst, block_size);
+}
+static inline void zdsp_negate_q15(const q15_t *src, q15_t *dst, uint32_t block_size)
+{
+	arm_negate_q15(src, dst, block_size);
+}
+static inline void zdsp_negate_q31(const q31_t *src, q31_t *dst, uint32_t block_size)
+{
+	arm_negate_q31(src, dst, block_size);
+}
+static inline void zdsp_negate_f32(const float32_t *src, float32_t *dst, uint32_t block_size)
+{
+	arm_negate_f32(src, dst, block_size);
+}
+
+static inline void zdsp_dot_prod_q7(const q7_t *src_a, const q7_t *src_b, uint32_t block_size,
+				    q31_t *dst)
+{
+	arm_dot_prod_q7(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_q15(const q15_t *src_a, const q15_t *src_b, uint32_t block_size,
+				     q63_t *dst)
+{
+	arm_dot_prod_q15(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_q31(const q31_t *src_a, const q31_t *src_b, uint32_t block_size,
+				     q63_t *dst)
+{
+	arm_dot_prod_q31(src_a, src_b, block_size, dst);
+}
+static inline void zdsp_dot_prod_f32(const float32_t *src_a, const float32_t *src_b,
+				     uint32_t block_size, float32_t *dst)
+{
+	arm_dot_prod_f32(src_a, src_b, block_size, dst);
+}
+
+static inline void zdsp_shift_q7(const q7_t *src, int8_t shift_bits, q7_t *dst, uint32_t block_size)
+{
+	arm_shift_q7(src, shift_bits, dst, block_size);
+}
+static inline void zdsp_shift_q15(const q15_t *src, int8_t shift_bits, q15_t *dst,
+				  uint32_t block_size)
+{
+	arm_shift_q15(src, shift_bits, dst, block_size);
+}
+static inline void zdsp_shift_q31(const q31_t *src, int8_t shift_bits, q31_t *dst,
+				  uint32_t block_size)
+{
+	arm_shift_q31(src, shift_bits, dst, block_size);
+}
+
+static inline void zdsp_offset_q7(const q7_t *src, q7_t offset, q7_t *dst, uint32_t block_size)
+{
+	arm_offset_q7(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_q15(const q15_t *src, q15_t offset, q15_t *dst, uint32_t block_size)
+{
+	arm_offset_q15(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_q31(const q31_t *src, q31_t offset, q31_t *dst, uint32_t block_size)
+{
+	arm_offset_q31(src, offset, dst, block_size);
+}
+static inline void zdsp_offset_f32(const float32_t *src, float32_t offset, float32_t *dst,
+				   uint32_t block_size)
+{
+	arm_offset_f32(src, offset, dst, block_size);
+}
+
+static inline void zdsp_clip_q7(const q7_t *src, q7_t *dst, q7_t low, q7_t high,
+				uint32_t num_samples)
+{
+	arm_clip_q7(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_q15(const q15_t *src, q15_t *dst, q15_t low, q15_t high,
+				 uint32_t num_samples)
+{
+	arm_clip_q15(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_q31(const q31_t *src, q31_t *dst, q31_t low, q31_t high,
+				 uint32_t num_samples)
+{
+	arm_clip_q31(src, dst, low, high, num_samples);
+}
+static inline void zdsp_clip_f32(const float32_t *src, float32_t *dst, float32_t low,
+				 float32_t high, uint32_t num_samples)
+{
+	arm_clip_f32(src, dst, low, high, num_samples);
+}
+
+static inline void zdsp_and_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size)
+{
+	arm_and_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_and_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size)
+{
+	arm_and_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_and_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size)
+{
+	arm_and_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_or_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			      uint32_t block_size)
+{
+	arm_or_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_or_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+			       uint32_t block_size)
+{
+	arm_or_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_or_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+			       uint32_t block_size)
+{
+	arm_or_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_xor_u8(const uint8_t *src_a, const uint8_t *src_b, uint8_t *dst,
+			       uint32_t block_size)
+{
+	arm_xor_u8(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_xor_u16(const uint16_t *src_a, const uint16_t *src_b, uint16_t *dst,
+				uint32_t block_size)
+{
+	arm_xor_u16(src_a, src_b, dst, block_size);
+}
+static inline void zdsp_xor_u32(const uint32_t *src_a, const uint32_t *src_b, uint32_t *dst,
+				uint32_t block_size)
+{
+	arm_xor_u32(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_not_u8(const uint8_t *src, uint8_t *dst, uint32_t block_size)
+{
+	arm_not_u8(src, dst, block_size);
+}
+static inline void zdsp_not_u16(const uint16_t *src, uint16_t *dst, uint32_t block_size)
+{
+	arm_not_u16(src, dst, block_size);
+}
+static inline void zdsp_not_u32(const uint32_t *src, uint32_t *dst, uint32_t block_size)
+{
+	arm_not_u32(src, dst, block_size);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef CONFIG_FP16
+#include "zdsp_backend_f16.h"
+#endif /* COIFNG_FP16 */
+
+#endif /* SUBSYS_MATH_CMSIS_BACKEND_PUBLIC_ZDSP_BACKEND_DSP_H_ */
diff --git a/subsys/dsp/cmsis/public/zdsp_backend_f16.h b/subsys/dsp/cmsis/public/zdsp_backend_f16.h
new file mode 100644
index 0000000000..1bc6364edd
--- /dev/null
+++ b/subsys/dsp/cmsis/public/zdsp_backend_f16.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_
+#define SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This include MUST be done before arm_math.h so we can let the arch specific
+ * logic set up the right #define values for arm_math.h
+ */
+#include <zephyr/kernel.h>
+
+#include <arm_math_f16.h>
+
+static inline void zdsp_mult_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				 uint32_t block_size)
+{
+	arm_mult_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_add_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				uint32_t block_size)
+{
+	arm_add_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_sub_f16(const float16_t *src_a, const float16_t *src_b, float16_t *dst,
+				uint32_t block_size)
+{
+	arm_sub_f16(src_a, src_b, dst, block_size);
+}
+
+static inline void zdsp_scale_f16(const float16_t *src, float16_t scale, float16_t *dst,
+				  uint32_t block_size)
+{
+	arm_scale_f16(src, scale, dst, block_size);
+}
+
+static inline void zdsp_abs_f16(const float16_t *src, float16_t *dst, uint32_t block_size)
+{
+	arm_abs_f16(src, dst, block_size);
+}
+
+static inline void zdsp_dot_prod_f16(const float16_t *src_a, const float16_t *src_b,
+				     uint32_t block_size, float16_t *result)
+{
+	arm_dot_prod_f16(src_a, src_b, block_size, result);
+}
+
+static inline void zdsp_offset_f16(const float16_t *src, float16_t offset, float16_t *dst,
+				   uint32_t block_size)
+{
+	arm_offset_f16(src, offset, dst, block_size);
+}
+
+static inline void zdsp_negate_f16(const float16_t *src, float16_t *dst, uint32_t block_size)
+{
+	arm_negate_f16(src, dst, block_size);
+}
+
+static inline void zdsp_clip_f16(const float16_t *src, float16_t *dst, float16_t low,
+				 float16_t high, uint32_t num_samples)
+{
+	arm_clip_f16(src, dst, low, high, num_samples);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SUBSYS_DSP_CMSIS_PUBLIC_ZDSP_BACKEND_F16_H_ */
diff --git a/west.yml b/west.yml
index 3473a1b5de..cc3f462b77 100644
--- a/west.yml
+++ b/west.yml
@@ -32,7 +32,7 @@ manifest:
       revision: fe0ab36e0fa7453a4c9b97bedac89709f45cf965
       path: modules/lib/chre
     - name: cmsis
-      revision: 093de61c2a7d12dc9253daf8692f61f793a9254a
+      revision: 74981bf893e8b10931464b9945e2143d99a3f0a3
       path: modules/hal/cmsis
       groups:
         - hal