diff --git a/.github/workflows/run-tests-rvv.yml b/.github/workflows/run-tests-rvv.yml
new file mode 100644
index 00000000..e97825f3
--- /dev/null
+++ b/.github/workflows/run-tests-rvv.yml
@@ -0,0 +1,55 @@
+#
+# Copyright 2020 - 2022 Free Software Foundation, Inc.
+#
+# This file is part of VOLK
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+name: Run VOLK tests on different RVV configurations
+
+on: [push, pull_request]
+
+jobs:
+  Tests:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: "recursive"
+    - name: Install packages
+      run: |
+        sudo apt-get update -q -y
+        sudo apt-get install -y python3-mako cmake qemu-user-static g++-14-riscv64-linux-gnu clang-18
+        mkdir build
+        cd build
+    - name: Test gcc-14 VLEN=128
+      run: |
+        cd build; rm -rf *
+        CXX=riscv64-linux-gnu-g++-14 CC=riscv64-linux-gnu-gcc-14 VLEN=128 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake ..
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test gcc-14 VLEN=256
+      run: |
+        cd build; rm -rf *
+        CXX=riscv64-linux-gnu-g++-14 CC=riscv64-linux-gnu-gcc-14 VLEN=256 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake .. -DCMAKE_BUILD_TYPE=Release
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test clang-18 VLEN=512
+      run: |
+        cd build; rm -rf *
+        CXX=clang++-18 CC=clang-18 CFLAGS=--target=riscv64-linux-gnu VLEN=512 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake ..
+        make -j$(nproc)
+        ARGS=-V make test
+    - name: Test clang-18 VLEN=1024
+      run: |
+        cd build; rm -rf *
+        CXX=clang++-18 CC=clang-18 CFLAGS=--target=riscv64-linux-gnu VLEN=1024 \
+        cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchains/rv64gcv-linux-gnu.cmake .. -DCMAKE_BUILD_TYPE=Release
+        make -j$(nproc)
+        ARGS=-V make test
+
+
diff --git a/cmake/Checks/check-rvv-intrinsics.c b/cmake/Checks/check-rvv-intrinsics.c
new file mode 100644
index 00000000..48d874de
--- /dev/null
+++ b/cmake/Checks/check-rvv-intrinsics.c
@@ -0,0 +1,5 @@
+#if (__riscv_v_intrinsic >= 1000000 || __clang_major__ >= 18 || __GNUC__ >= 14)
+int main() { return 0; }
+#else
+#error "rvv intrinsics aren't supported"
+#endif
diff --git a/cmake/Toolchains/rv64gcv-linux-gnu.cmake b/cmake/Toolchains/rv64gcv-linux-gnu.cmake
new file mode 100644
index 00000000..f6edd741
--- /dev/null
+++ b/cmake/Toolchains/rv64gcv-linux-gnu.cmake
@@ -0,0 +1,34 @@
+#
+# Copyright 2024 Free Software Foundation, Inc.
+#
+# This file is part of VOLK
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
+#
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR riscv64)
+
+set(CMAKE_C_COMPILER $ENV{CC})
+set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
+set(CMAKE_CXX_COMPILER $ENV{CXX})
+
+set(CMAKE_C_FLAGS "$ENV{CFLAGS} -march=rv64gcv" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "" FORCE)
+set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -g" CACHE STRING "" FORCE)
+
+set(CMAKE_OBJCOPY
+    ${RISCV64_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}objcopy
+    CACHE INTERNAL "objcopy tool")
+set(CMAKE_SIZE_UTIL
+    ${RISCV64_TOOLCHAIN_DIR}/${TOOLCHAIN_PREFIX}size
+    CACHE INTERNAL "size tool")
+
+set(CMAKE_FIND_ROOT_PATH ${BINUTILS_PATH})
+
+set(QEMU_VLEN $ENV{VLEN})
+if(NOT QEMU_VLEN)
+    set(QEMU_VLEN "128")
+endif()
+
+set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-riscv64-static -L /usr/riscv64-linux-gnu/ -cpu rv64,zba=true,zbb=true,v=on,vlen=${QEMU_VLEN},rvv_ta_all_1s=on,rvv_ma_all_1s=on")
diff --git a/gen/archs.xml b/gen/archs.xml
index 164c7bb4..7f971369 100644
--- a/gen/archs.xml
+++ b/gen/archs.xml
@@ -181,4 +181,48 @@ at the top, as a last resort.
 <arch name="riscv64">
 </arch>
 
+<!-->
+    tmpl/ currently assumes that every arch.name starting with "rv" requires
+    RVV intrinsics
+</-->
+<!-->
+    There is currently no mechanism in RISC-V to append extensions,
+    so each arch needs to specify all of them, and the order needs in the
+    machine definition needs to be from the fewest to the most extensions.
+    Fortunately, this maps quite well to the profiles concept.
+</-->
+<arch name="rvv">
+    <check name="V"></check>
+    <flag compiler="gnu">-march=rv64gcv</flag>
+    <flag compiler="clang">-march=rv64gcv</flag>
+</arch>
+
+<arch name="rvvseg">
+    <check name="V"></check>
+    <flag compiler="gnu">-march=rv64gcv</flag>
+    <flag compiler="clang">-march=rv64gcv</flag>
+    <!-->
+        It's unclear how performance portable segmented load/stores are, so the
+        default rvv implementations avoid using them.
+        This is a pseudo arch for separate segmented load/store implementations,
+        and is expected to never be used standalone without "rvv".
+    </-->
+</arch>
+
+<!-->
+    google/cpu_features currently doesn't support these extensions and profiles.
+</-->
+<!--arch name="rva22v">
+    <check name="V"></check>
+    <check name="B"></check>
+    <flag compiler="gnu">-march=rv64gcv_zba_zbb_zbs</flag>
+    <flag compiler="clang">-march=rv64gcv_zba_zbb_zbs</flag>
+</arch-->
+
+<!--arch name="rva23">
+    <check name="rva23"></check>
+    <flag compiler="gnu">-march=rva23u64</flag>
+    <flag compiler="clang">-march=rva23u64</flag>
+</arch-->
+
 </grammar>
diff --git a/gen/machines.xml b/gen/machines.xml
index 887f9794..64e1bbd8 100644
--- a/gen/machines.xml
+++ b/gen/machines.xml
@@ -33,6 +33,18 @@
 <archs>generic riscv64 orc|</archs>
 </machine>
 
+<machine name="rv64gcv">
+<archs>generic riscv64 rvv rvvseg orc|</archs>
+</machine>
+
+<!--machine name="rva22v">
+<archs>generic riscv64 rvv rvvseg rva22v orc|</archs>
+</machine-->
+
+<!--machine name="rva23">
+<archs>generic riscv64 rvv rvvseg rva22v rva23 orc|</archs>
+</machine-->
+
 <machine name="sse4_a">
 <archs>generic 32|64| mmx| sse sse2 sse3 sse4_a popcount orc|</archs>
 </machine>
diff --git a/include/volk/volk_rvv_intrinsics.h b/include/volk/volk_rvv_intrinsics.h
new file mode 100644
index 00000000..85e21d43
--- /dev/null
+++ b/include/volk/volk_rvv_intrinsics.h
@@ -0,0 +1,77 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2024 Free Software Foundation, Inc.
+ *
+ * This file is part of VOLK
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ */
+
+/*
+ * This file is intended to hold RVV intrinsics of intrinsics.
+ * They should be used in VOLK kernels to avoid copy-paste.
+ */
+
+#ifndef INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
+#define INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
+#include <riscv_vector.h>
+
+#define RISCV_SHRINK2(op, T, S, v)              \
+    __riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                 __riscv_vget_##T##S##m1(v, 1), \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_SHRINK4(op, T, S, v)                           \
+    __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                              __riscv_vget_##T##S##m1(v, 1), \
+                              __riscv_vsetvlmax_e##S##m1()), \
+                 __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
+                              __riscv_vget_##T##S##m1(v, 3), \
+                              __riscv_vsetvlmax_e##S##m1()), \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_SHRINK8(op, T, S, v)                                        \
+    __riscv_##op(__riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
+                                           __riscv_vget_##T##S##m1(v, 1), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
+                                           __riscv_vget_##T##S##m1(v, 3), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_vsetvlmax_e##S##m1()),              \
+                 __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 4), \
+                                           __riscv_vget_##T##S##m1(v, 5), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_##op(__riscv_vget_##T##S##m1(v, 6), \
+                                           __riscv_vget_##T##S##m1(v, 7), \
+                                           __riscv_vsetvlmax_e##S##m1()), \
+                              __riscv_vsetvlmax_e##S##m1()),              \
+                 __riscv_vsetvlmax_e##S##m1())
+
+#define RISCV_PERM4(f, v, vidx)                                     \
+    __riscv_vcreate_v_u8m1_u8m4(                                    \
+        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_LUT4(f, vtbl, v)                                      \
+    __riscv_vcreate_v_u8m1_u8m4(                                    \
+        f(vtbl, __riscv_vget_u8m1(v, 0), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 1), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 2), __riscv_vsetvlmax_e8m1()), \
+        f(vtbl, __riscv_vget_u8m1(v, 3), __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_PERM8(f, v, vidx)                                     \
+    __riscv_vcreate_v_u8m1_u8m8(                                    \
+        f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 4), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 5), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 6), vidx, __riscv_vsetvlmax_e8m1()), \
+        f(__riscv_vget_u8m1(v, 7), vidx, __riscv_vsetvlmax_e8m1()))
+
+#define RISCV_VMFLTZ(T, v, vl) __riscv_vmslt(__riscv_vreinterpret_i##T(v), 0, vl)
+
+#endif /* INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_ */
diff --git a/kernels/volk/volk_16i_32fc_dot_prod_32fc.h b/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
index 8949785f..8d772ba8 100644
--- a/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
+++ b/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
@@ -668,5 +668,66 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result,
 
 #endif /*LV_HAVE_AVX2 && LV_HAVE_FMA*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_16i_32fc_dot_prod_32fc_rvv(lv_32fc_t* result,
+                                                   const short* input,
+                                                   const lv_32fc_t* taps,
+                                                   unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)taps, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v =
+            __riscv_vfwcvt_f(__riscv_vle16_v_i16m2((const int16_t*)input, vl), vl);
+        vsumr = __riscv_vfmacc_tu(vsumr, vr, v, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vi, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_16i_32fc_dot_prod_32fc_rvvseg(lv_32fc_t* result,
+                                                      const short* input,
+                                                      const lv_32fc_t* taps,
+                                                      unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)taps, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v =
+            __riscv_vfwcvt_f(__riscv_vle16_v_i16m2((const int16_t*)input, vl), vl);
+        vsumr = __riscv_vfmacc_tu(vsumr, vr, v, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vi, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /*INCLUDED_volk_16i_32fc_dot_prod_32fc_H*/
diff --git a/kernels/volk/volk_16i_branch_4_state_8.h b/kernels/volk/volk_16i_branch_4_state_8.h
index b0f4d3b6..775b1523 100644
--- a/kernels/volk/volk_16i_branch_4_state_8.h
+++ b/kernels/volk/volk_16i_branch_4_state_8.h
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_branch_4_state_8
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16i_convert_8i.h b/kernels/volk/volk_16i_convert_8i.h
index cb7168ef..648712af 100644
--- a/kernels/volk/volk_16i_convert_8i.h
+++ b/kernels/volk/volk_16i_convert_8i.h
@@ -275,5 +275,20 @@ static inline void volk_16i_convert_8i_neon(int8_t* outputVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16i_convert_8i_rvv(int8_t* outputVector,
+                                           const int16_t* inputVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vint16m8_t v = __riscv_vle16_v_i16m8(inputVector, vl);
+        __riscv_vse8(outputVector, __riscv_vnsra(v, 8, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_16i_convert_8i_a_H */
diff --git a/kernels/volk/volk_16i_max_star_16i.h b/kernels/volk/volk_16i_max_star_16i.h
index fba73da1..ab0a4bcf 100644
--- a/kernels/volk/volk_16i_max_star_16i.h
+++ b/kernels/volk/volk_16i_max_star_16i.h
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_max_star_16i
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16i_max_star_horizontal_16i.h b/kernels/volk/volk_16i_max_star_horizontal_16i.h
index 2b0b65c3..ee08ba43 100644
--- a/kernels/volk/volk_16i_max_star_horizontal_16i.h
+++ b/kernels/volk/volk_16i_max_star_horizontal_16i.h
@@ -11,6 +11,10 @@
 /*!
  * \page volk_16i_max_star_horizontal_16i
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16i_permute_and_scalar_add.h b/kernels/volk/volk_16i_permute_and_scalar_add.h
index 077c37b0..f57603db 100644
--- a/kernels/volk/volk_16i_permute_and_scalar_add.h
+++ b/kernels/volk/volk_16i_permute_and_scalar_add.h
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_permute_and_scalar_add
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16i_s32f_convert_32f.h b/kernels/volk/volk_16i_s32f_convert_32f.h
index 817ecd22..1f9660ce 100644
--- a/kernels/volk/volk_16i_s32f_convert_32f.h
+++ b/kernels/volk/volk_16i_s32f_convert_32f.h
@@ -483,4 +483,21 @@ static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector,
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16i_s32f_convert_32f_rvv(float* outputVector,
+                                                 const int16_t* inputVector,
+                                                 const float scalar,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vfloat32m8_t v = __riscv_vfwcvt_f(__riscv_vle16_v_i16m4(inputVector, vl), vl);
+        __riscv_vse32(outputVector, __riscv_vfmul(v, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
diff --git a/kernels/volk/volk_16i_x4_quad_max_star_16i.h b/kernels/volk/volk_16i_x4_quad_max_star_16i.h
index a8337cc3..94e264fe 100644
--- a/kernels/volk/volk_16i_x4_quad_max_star_16i.h
+++ b/kernels/volk/volk_16i_x4_quad_max_star_16i.h
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_x4_quad_max_star_16i
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16i_x5_add_quad_16i_x4.h b/kernels/volk/volk_16i_x5_add_quad_16i_x4.h
index 53fa8de5..ba14c59d 100644
--- a/kernels/volk/volk_16i_x5_add_quad_16i_x4.h
+++ b/kernels/volk/volk_16i_x5_add_quad_16i_x4.h
@@ -10,6 +10,10 @@
 /*!
  * \page volk_16i_x5_add_quad_16i_x4
  *
+ * \b Deprecation
+ *
+ * This kernel is deprecated.
+ *
  * \b Overview
  *
  * <FIXME>
diff --git a/kernels/volk/volk_16ic_convert_32fc.h b/kernels/volk/volk_16ic_convert_32fc.h
index 7a779bf8..99fe7cb2 100644
--- a/kernels/volk/volk_16ic_convert_32fc.h
+++ b/kernels/volk/volk_16ic_convert_32fc.h
@@ -315,4 +315,23 @@ static inline void volk_16ic_convert_32fc_u_avx(lv_32fc_t* outputVector,
 }
 
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_convert_32fc_rvv(lv_32fc_t* outputVector,
+                                              const lv_16sc_t* inputVector,
+                                              unsigned int num_points)
+{
+    const int16_t* in = (const int16_t*)inputVector;
+    float* out = (float*)outputVector;
+    size_t n = num_points * 2;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4_t v = __riscv_vle16_v_i16m4(in, vl);
+        __riscv_vse32(out, __riscv_vfwcvt_f(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_convert_16ic_u_H */
diff --git a/kernels/volk/volk_16ic_deinterleave_16i_x2.h b/kernels/volk/volk_16ic_deinterleave_16i_x2.h
index 37fb41e1..9f4ad7f7 100644
--- a/kernels/volk/volk_16ic_deinterleave_16i_x2.h
+++ b/kernels/volk/volk_16ic_deinterleave_16i_x2.h
@@ -375,4 +375,45 @@ static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_deinterleave_16i_x2_rvv(int16_t* iBuffer,
+                                                     int16_t* qBuffer,
+                                                     const lv_16sc_t* complexVector,
+                                                     unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vuint32m8_t vc = __riscv_vle32_v_u32m8((const uint32_t*)complexVector, vl);
+        vuint16m4_t vr = __riscv_vnsrl(vc, 0, vl);
+        vuint16m4_t vi = __riscv_vnsrl(vc, 16, vl);
+        __riscv_vse16((uint16_t*)iBuffer, vr, vl);
+        __riscv_vse16((uint16_t*)qBuffer, vi, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_16ic_deinterleave_16i_x2_rvvseg(int16_t* iBuffer,
+                                                        int16_t* qBuffer,
+                                                        const lv_16sc_t* complexVector,
+                                                        unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vuint16m4x2_t vc =
+            __riscv_vlseg2e16_v_u16m4x2((const uint16_t*)complexVector, vl);
+        vuint16m4_t vr = __riscv_vget_u16m4(vc, 0);
+        vuint16m4_t vi = __riscv_vget_u16m4(vc, 1);
+        __riscv_vse16((uint16_t*)iBuffer, vr, vl);
+        __riscv_vse16((uint16_t*)qBuffer, vi, vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_u_H */
diff --git a/kernels/volk/volk_16ic_deinterleave_real_16i.h b/kernels/volk/volk_16ic_deinterleave_real_16i.h
index 92110a3a..f5a9696f 100644
--- a/kernels/volk/volk_16ic_deinterleave_real_16i.h
+++ b/kernels/volk/volk_16ic_deinterleave_real_16i.h
@@ -377,4 +377,21 @@ static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_deinterleave_real_16i_rvv(int16_t* iBuffer,
+                                                       const lv_16sc_t* complexVector,
+                                                       unsigned int num_points)
+{
+    const uint32_t* in = (const uint32_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vuint32m8_t vc = __riscv_vle32_v_u32m8(in, vl);
+        __riscv_vse16((uint16_t*)iBuffer, __riscv_vnsrl(vc, 0, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_16ic_deinterleave_real_16i_u_H */
diff --git a/kernels/volk/volk_16ic_deinterleave_real_8i.h b/kernels/volk/volk_16ic_deinterleave_real_8i.h
index 231be417..257ea519 100644
--- a/kernels/volk/volk_16ic_deinterleave_real_8i.h
+++ b/kernels/volk/volk_16ic_deinterleave_real_8i.h
@@ -415,4 +415,24 @@ static inline void volk_16ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
     }
 }
 #endif /* LV_HAVE_AVX2 */
+
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_deinterleave_real_8i_rvv(int8_t* iBuffer,
+                                                      const lv_16sc_t* complexVector,
+                                                      unsigned int num_points)
+{
+    const uint32_t* in = (const uint32_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vuint32m8_t vc = __riscv_vle32_v_u32m8(in, vl);
+        __riscv_vse8(
+            (uint8_t*)iBuffer, __riscv_vnsrl(__riscv_vnsrl(vc, 0, vl), 8, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_16ic_deinterleave_real_8i_u_H */
diff --git a/kernels/volk/volk_16ic_magnitude_16i.h b/kernels/volk/volk_16ic_magnitude_16i.h
index 76472540..79553d65 100644
--- a/kernels/volk/volk_16ic_magnitude_16i.h
+++ b/kernels/volk/volk_16ic_magnitude_16i.h
@@ -411,4 +411,50 @@ static inline void volk_16ic_magnitude_16i_neonv7(int16_t* magnitudeVector,
 }
 #endif /* LV_HAVE_NEONV7 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_magnitude_16i_rvv(int16_t* magnitudeVector,
+                                               const lv_16sc_t* complexVector,
+                                               unsigned int num_points)
+{
+    const float scale = SHRT_MAX, iscale = 1.0f / scale;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint32m8_t vc = __riscv_vle32_v_i32m8((const int32_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vnsra(vc, 0, vl);
+        vint16m4_t vi = __riscv_vnsra(vc, 16, vl);
+        vfloat32m8_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), iscale, vl);
+        vfloat32m8_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), iscale, vl);
+        vfloat32m8_t vf = __riscv_vfmacc(__riscv_vfmul(vif, vif, vl), vrf, vrf, vl);
+        vf = __riscv_vfmul(__riscv_vfsqrt(vf, vl), scale, vl);
+        __riscv_vse16(magnitudeVector, __riscv_vfncvt_x(vf, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_16ic_magnitude_16i_rvvseg(int16_t* magnitudeVector,
+                                                  const lv_16sc_t* complexVector,
+                                                  unsigned int num_points)
+{
+    const float scale = SHRT_MAX, iscale = 1.0f / scale;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4x2_t vc = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vget_i16m4(vc, 0);
+        vint16m4_t vi = __riscv_vget_i16m4(vc, 1);
+        vfloat32m8_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), iscale, vl);
+        vfloat32m8_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), iscale, vl);
+        vfloat32m8_t vf = __riscv_vfmacc(__riscv_vfmul(vif, vif, vl), vrf, vrf, vl);
+        vf = __riscv_vfmul(__riscv_vfsqrt(vf, vl), scale, vl);
+        __riscv_vse16(magnitudeVector, __riscv_vfncvt_x(vf, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_16ic_magnitude_16i_u_H */
diff --git a/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h b/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h
index 219e977c..7f9b8ad6 100644
--- a/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h
+++ b/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h
@@ -327,4 +327,51 @@ volk_16ic_s32f_deinterleave_32f_x2_u_avx2(float* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_s32f_deinterleave_32f_x2_rvv(float* iBuffer,
+                                                          float* qBuffer,
+                                                          const lv_16sc_t* complexVector,
+                                                          const float scalar,
+                                                          unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint32m8_t vc = __riscv_vle32_v_i32m8((const int32_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vnsra(vc, 0, vl);
+        vint16m4_t vi = __riscv_vnsra(vc, 16, vl);
+        vfloat32m8_t vrf = __riscv_vfwcvt_f(vr, vl);
+        vfloat32m8_t vif = __riscv_vfwcvt_f(vi, vl);
+        __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
+        __riscv_vse32(qBuffer, __riscv_vfmul(vif, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void
+volk_16ic_s32f_deinterleave_32f_x2_rvvseg(float* iBuffer,
+                                          float* qBuffer,
+                                          const lv_16sc_t* complexVector,
+                                          const float scalar,
+                                          unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4x2_t vc = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vget_i16m4(vc, 0);
+        vint16m4_t vi = __riscv_vget_i16m4(vc, 1);
+        vfloat32m8_t vrf = __riscv_vfwcvt_f(vr, vl);
+        vfloat32m8_t vif = __riscv_vfwcvt_f(vi, vl);
+        __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
+        __riscv_vse32(qBuffer, __riscv_vfmul(vif, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_u_H */
diff --git a/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h b/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
index 55688329..e8a0d1a0 100644
--- a/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
+++ b/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
@@ -334,4 +334,24 @@ volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_16ic_s32f_deinterleave_real_32f_rvv(float* iBuffer,
+                                         const lv_16sc_t* complexVector,
+                                         const float scalar,
+                                         unsigned int num_points)
+{
+    const int32_t* in = (const int32_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vint32m8_t vc = __riscv_vle32_v_i32m8(in, vl);
+        vfloat32m8_t vr = __riscv_vfwcvt_f(__riscv_vncvt_x(vc, vl), vl);
+        __riscv_vse32(iBuffer, __riscv_vfmul(vr, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
diff --git a/kernels/volk/volk_16ic_s32f_magnitude_32f.h b/kernels/volk/volk_16ic_s32f_magnitude_32f.h
index 89600632..8b193ee2 100644
--- a/kernels/volk/volk_16ic_s32f_magnitude_32f.h
+++ b/kernels/volk/volk_16ic_s32f_magnitude_32f.h
@@ -329,4 +329,48 @@ static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_s32f_magnitude_32f_rvv(float* magnitudeVector,
+                                                    const lv_16sc_t* complexVector,
+                                                    const float scalar,
+                                                    unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint32m8_t vc = __riscv_vle32_v_i32m8((const int32_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vnsra(vc, 0, vl);
+        vint16m4_t vi = __riscv_vnsra(vc, 16, vl);
+        vfloat32m8_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), 1.0f / scalar, vl);
+        vfloat32m8_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), 1.0f / scalar, vl);
+        vfloat32m8_t vf = __riscv_vfmacc(__riscv_vfmul(vif, vif, vl), vrf, vrf, vl);
+        __riscv_vse32(magnitudeVector, __riscv_vfsqrt(vf, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_16ic_s32f_magnitude_32f_rvvseg(float* magnitudeVector,
+                                                       const lv_16sc_t* complexVector,
+                                                       const float scalar,
+                                                       unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4x2_t vc = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)complexVector, vl);
+        vint16m4_t vr = __riscv_vget_i16m4(vc, 0);
+        vint16m4_t vi = __riscv_vget_i16m4(vc, 1);
+        vfloat32m8_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), 1.0f / scalar, vl);
+        vfloat32m8_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), 1.0f / scalar, vl);
+        vfloat32m8_t vf = __riscv_vfmacc(__riscv_vfmul(vif, vif, vl), vrf, vrf, vl);
+        __riscv_vse32(magnitudeVector, __riscv_vfsqrt(vf, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */
diff --git a/kernels/volk/volk_16ic_x2_dot_prod_16ic.h b/kernels/volk/volk_16ic_x2_dot_prod_16ic.h
index 48e33abf..a12350a0 100644
--- a/kernels/volk/volk_16ic_x2_dot_prod_16ic.h
+++ b/kernels/volk/volk_16ic_x2_dot_prod_16ic.h
@@ -690,4 +690,68 @@ static inline void volk_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out,
 
 #endif /* LV_HAVE_NEON */
 
+
+#ifdef LV_HAVE_RVV
+#include "volk_32fc_x2_dot_prod_32fc.h"
+
+static inline void volk_16ic_x2_dot_prod_16ic_rvv(lv_16sc_t* result,
+                                                  const lv_16sc_t* in_a,
+                                                  const lv_16sc_t* in_b,
+                                                  unsigned int num_points)
+{
+    vint16m4_t vsumr = __riscv_vmv_v_x_i16m4(0, __riscv_vsetvlmax_e16m4());
+    vint16m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in_a += vl, in_b += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint32m8_t va = __riscv_vle32_v_i32m8((const int32_t*)in_a, vl);
+        vint32m8_t vb = __riscv_vle32_v_i32m8((const int32_t*)in_b, vl);
+        vint16m4_t var = __riscv_vnsra(va, 0, vl), vai = __riscv_vnsra(va, 16, vl);
+        vint16m4_t vbr = __riscv_vnsra(vb, 0, vl), vbi = __riscv_vnsra(vb, 16, vl);
+        vint16m4_t vr = __riscv_vnmsac(__riscv_vmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi = __riscv_vmacc(__riscv_vmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e16m1();
+    vint16m1_t vr = RISCV_SHRINK4(vadd, i, 16, vsumr);
+    vint16m1_t vi = RISCV_SHRINK4(vadd, i, 16, vsumi);
+    vint16m1_t z = __riscv_vmv_s_x_i16m1(0, vl);
+    *result = lv_cmake(__riscv_vmv_x(__riscv_vredsum(vr, z, vl)),
+                       __riscv_vmv_x(__riscv_vredsum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include "volk_32fc_x2_dot_prod_32fc.h"
+
+
+static inline void volk_16ic_x2_dot_prod_16ic_rvvseg(lv_16sc_t* result,
+                                                     const lv_16sc_t* in_a,
+                                                     const lv_16sc_t* in_b,
+                                                     unsigned int num_points)
+{
+    vint16m4_t vsumr = __riscv_vmv_v_x_i16m4(0, __riscv_vsetvlmax_e16m4());
+    vint16m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in_a += vl, in_b += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4x2_t va = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)in_a, vl);
+        vint16m4x2_t vb = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)in_b, vl);
+        vint16m4_t var = __riscv_vget_i16m4(va, 0), vai = __riscv_vget_i16m4(va, 1);
+        vint16m4_t vbr = __riscv_vget_i16m4(vb, 0), vbi = __riscv_vget_i16m4(vb, 1);
+        vint16m4_t vr = __riscv_vnmsac(__riscv_vmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi = __riscv_vmacc(__riscv_vmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e16m1();
+    vint16m1_t vr = RISCV_SHRINK4(vadd, i, 16, vsumr);
+    vint16m1_t vi = RISCV_SHRINK4(vadd, i, 16, vsumi);
+    vint16m1_t z = __riscv_vmv_s_x_i16m1(0, vl);
+    *result = lv_cmake(__riscv_vmv_x(__riscv_vredsum(vr, z, vl)),
+                       __riscv_vmv_x(__riscv_vredsum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_16ic_x2_dot_prod_16ic_H*/
diff --git a/kernels/volk/volk_16ic_x2_multiply_16ic.h b/kernels/volk/volk_16ic_x2_multiply_16ic.h
index 03ee145c..37f0fb66 100644
--- a/kernels/volk/volk_16ic_x2_multiply_16ic.h
+++ b/kernels/volk/volk_16ic_x2_multiply_16ic.h
@@ -462,4 +462,52 @@ static inline void volk_16ic_x2_multiply_16ic_neon(lv_16sc_t* out,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_16ic_x2_multiply_16ic_rvv(lv_16sc_t* result,
+                                                  const lv_16sc_t* in_a,
+                                                  const lv_16sc_t* in_b,
+                                                  unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in_a += vl, in_b += vl, result += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint32m8_t va = __riscv_vle32_v_i32m8((const int32_t*)in_a, vl);
+        vint32m8_t vb = __riscv_vle32_v_i32m8((const int32_t*)in_b, vl);
+        vint16m4_t var = __riscv_vnsra(va, 0, vl), vai = __riscv_vnsra(va, 16, vl);
+        vint16m4_t vbr = __riscv_vnsra(vb, 0, vl), vbi = __riscv_vnsra(vb, 16, vl);
+        vint16m4_t vr = __riscv_vnmsac(__riscv_vmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi = __riscv_vmacc(__riscv_vmul(var, vbi, vl), vai, vbr, vl);
+        vuint16m4_t vru = __riscv_vreinterpret_u16m4(vr);
+        vuint16m4_t viu = __riscv_vreinterpret_u16m4(vi);
+        vuint32m8_t v = __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFF, viu, vl);
+        __riscv_vse32((uint32_t*)result, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_16ic_x2_multiply_16ic_rvvseg(lv_16sc_t* result,
+                                                     const lv_16sc_t* in_a,
+                                                     const lv_16sc_t* in_b,
+                                                     unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in_a += vl, in_b += vl, result += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vint16m4x2_t va = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)in_a, vl);
+        vint16m4x2_t vb = __riscv_vlseg2e16_v_i16m4x2((const int16_t*)in_b, vl);
+        vint16m4_t var = __riscv_vget_i16m4(va, 0), vai = __riscv_vget_i16m4(va, 1);
+        vint16m4_t vbr = __riscv_vget_i16m4(vb, 0), vbi = __riscv_vget_i16m4(vb, 1);
+        vint16m4_t vr = __riscv_vnmsac(__riscv_vmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi = __riscv_vmacc(__riscv_vmul(var, vbi, vl), vai, vbr, vl);
+        __riscv_vsseg2e16_v_i16m4x2(
+            (int16_t*)result, __riscv_vcreate_v_i16m4x2(vr, vi), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_16ic_x2_multiply_16ic_H*/
diff --git a/kernels/volk/volk_16u_byteswap.h b/kernels/volk/volk_16u_byteswap.h
index 8b1b8c03..50e59906 100644
--- a/kernels/volk/volk_16u_byteswap.h
+++ b/kernels/volk/volk_16u_byteswap.h
@@ -280,5 +280,54 @@ static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int nu
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_16u_byteswap_rvv(uint16_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    size_t vlmax = __riscv_vsetvlmax_e8m1();
+    if (vlmax <= 256) {
+        vuint8m1_t vidx = __riscv_vreinterpret_u8m1(
+            __riscv_vsub(__riscv_vreinterpret_u16m1(__riscv_vid_v_u8m1(vlmax)),
+                         0x100 - 0x1,
+                         vlmax / 2));
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e16m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle16_v_u16m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgather, v, vidx);
+            __riscv_vse16(intsToSwap, __riscv_vreinterpret_u16m8(v), vl);
+        }
+    } else {
+        vuint16m2_t vidx = __riscv_vreinterpret_u16m2(
+            __riscv_vsub(__riscv_vreinterpret_u32m2(__riscv_vid_v_u16m2(vlmax)),
+                         0x10000 - 0x1,
+                         vlmax / 2));
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e16m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle16_v_u16m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgatherei16, v, vidx);
+            __riscv_vse16(intsToSwap, __riscv_vreinterpret_u16m8(v), vl);
+        }
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA23
+#include <riscv_vector.h>
+
+static inline void volk_16u_byteswap_rva23(uint16_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vuint16m8_t v = __riscv_vle16_v_u16m8(intsToSwap, vl);
+        __riscv_vse16(intsToSwap, __riscv_vrev8(v, vl), vl);
+    }
+}
+#endif /* LV_HAVE_RVA23 */
 
 #endif /* INCLUDED_volk_16u_byteswap_a_H */
diff --git a/kernels/volk/volk_16u_byteswappuppet_16u.h b/kernels/volk/volk_16u_byteswappuppet_16u.h
index 16e75d91..f01129eb 100644
--- a/kernels/volk/volk_16u_byteswappuppet_16u.h
+++ b/kernels/volk/volk_16u_byteswappuppet_16u.h
@@ -102,4 +102,26 @@ static inline void volk_16u_byteswappuppet_16u_u_orc(uint16_t* output,
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_16u_byteswappuppet_16u_rvv(uint16_t* output,
+                                                   uint16_t* intsToSwap,
+                                                   unsigned int num_points)
+{
+
+    volk_16u_byteswap_rvv((uint16_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
+}
+#endif
+
+#ifdef LV_HAVE_RVA23
+static inline void volk_16u_byteswappuppet_16u_rva23(uint16_t* output,
+                                                     uint16_t* intsToSwap,
+                                                     unsigned int num_points)
+{
+
+    volk_16u_byteswap_rva23((uint16_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
+}
+#endif
+
 #endif
diff --git a/kernels/volk/volk_32f_64f_add_64f.h b/kernels/volk/volk_32f_64f_add_64f.h
index 06b56819..54d890e3 100644
--- a/kernels/volk/volk_32f_64f_add_64f.h
+++ b/kernels/volk/volk_32f_64f_add_64f.h
@@ -230,4 +230,22 @@ static inline void volk_32f_64f_add_64f_a_avx(double* cVector,
 
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_64f_add_64f_rvv(double* cVector,
+                                            const float* aVector,
+                                            const double* bVector,
+                                            unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vfwcvt_f(__riscv_vle32_v_f32m4(aVector, vl), vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfadd(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_64f_add_64f_u_H */
diff --git a/kernels/volk/volk_32f_64f_multiply_64f.h b/kernels/volk/volk_32f_64f_multiply_64f.h
index 069cd73e..5ff81578 100644
--- a/kernels/volk/volk_32f_64f_multiply_64f.h
+++ b/kernels/volk/volk_32f_64f_multiply_64f.h
@@ -188,5 +188,22 @@ static inline void volk_32f_64f_multiply_64f_a_avx(double* cVector,
 
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_64f_multiply_64f_rvv(double* cVector,
+                                                 const float* aVector,
+                                                 const double* bVector,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vfwcvt_f(__riscv_vle32_v_f32m4(aVector, vl), vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfmul(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
diff --git a/kernels/volk/volk_32f_8u_polarbutterfly_32f.h b/kernels/volk/volk_32f_8u_polarbutterfly_32f.h
index b3683a96..41e98a80 100644
--- a/kernels/volk/volk_32f_8u_polarbutterfly_32f.h
+++ b/kernels/volk/volk_32f_8u_polarbutterfly_32f.h
@@ -383,4 +383,174 @@ static inline void volk_32f_8u_polarbutterfly_32f_u_avx2(float* llrs,
 
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_8u_polarbutterfly_32f_rvv(float* llrs,
+                                                      unsigned char* u,
+                                                      const int frame_exp,
+                                                      const int stage,
+                                                      const int u_num,
+                                                      const int row)
+{
+    const int frame_size = 0x01 << frame_exp;
+    if (row % 2) { // for odd rows just do the only necessary calculation and return.
+        const float* next_llrs = llrs + frame_size + row;
+        *(llrs + row) = llr_even(*(next_llrs - 1), *next_llrs, u[u_num - 1]);
+        return;
+    }
+
+    const int max_stage_depth = calculate_max_stage_depth_for_row(frame_exp, row);
+    if (max_stage_depth < 3) { // vectorized version needs larger vectors.
+        volk_32f_8u_polarbutterfly_32f_generic(llrs, u, frame_exp, stage, u_num, row);
+        return;
+    }
+
+    int loop_stage = max_stage_depth;
+    int stage_size = 0x01 << loop_stage;
+
+    float* src_llr_ptr;
+    float* dst_llr_ptr;
+
+    if (row) { // not necessary for ZERO row. == first bit to be decoded.
+        // first do bit combination for all stages
+        // effectively encode some decoded bits again.
+        unsigned char* u_target = u + frame_size;
+        unsigned char* u_temp = u + 2 * frame_size;
+        memcpy(u_temp, u + u_num - stage_size, sizeof(unsigned char) * stage_size);
+
+        volk_8u_x2_encodeframepolar_8u_rvv(u_target, u_temp, stage_size);
+
+        src_llr_ptr = llrs + (max_stage_depth + 1) * frame_size + row - stage_size;
+        dst_llr_ptr = llrs + max_stage_depth * frame_size + row;
+
+        size_t n = stage_size;
+        for (size_t vl; n > 0;
+             n -= vl, u_target += vl, src_llr_ptr += vl * 2, dst_llr_ptr += vl) {
+            vl = __riscv_vsetvl_e32m1(n);
+            vint8mf4_t v = __riscv_vle8_v_i8mf4((int8_t*)u_target, vl);
+            vuint64m2_t llr = __riscv_vle64_v_u64m2((const uint64_t*)src_llr_ptr, vl);
+            vfloat32m1_t llr0 = __riscv_vreinterpret_f32m1(__riscv_vnsrl(llr, 0, vl));
+            vfloat32m1_t llr1 = __riscv_vreinterpret_f32m1(__riscv_vnsrl(llr, 32, vl));
+            llr0 = __riscv_vfneg_mu(__riscv_vmslt(v, 0, vl), llr0, llr0, vl);
+            llr0 = __riscv_vfadd(llr0, llr1, vl);
+            __riscv_vse32(dst_llr_ptr, llr0, vl);
+        }
+
+        --loop_stage;
+        stage_size >>= 1;
+    }
+
+    const int min_stage = stage > 2 ? stage : 2;
+
+    while (min_stage < loop_stage) {
+        dst_llr_ptr = llrs + loop_stage * frame_size + row;
+        src_llr_ptr = dst_llr_ptr + frame_size;
+
+        size_t n = stage_size;
+        for (size_t vl; n > 0; n -= vl, src_llr_ptr += vl * 2, dst_llr_ptr += vl) {
+            vl = __riscv_vsetvl_e32m1(n);
+            vuint64m2_t llr = __riscv_vle64_v_u64m2((const uint64_t*)src_llr_ptr, vl);
+            vfloat32m1_t llr0 = __riscv_vreinterpret_f32m1(__riscv_vnsrl(llr, 0, vl));
+            vfloat32m1_t llr1 = __riscv_vreinterpret_f32m1(__riscv_vnsrl(llr, 32, vl));
+            vfloat32m1_t v =
+                __riscv_vfmin(__riscv_vfabs(llr0, vl), __riscv_vfabs(llr1, vl), vl);
+            v = __riscv_vfsgnjx(__riscv_vfsgnj(v, llr0, vl), llr1, vl);
+            __riscv_vse32(dst_llr_ptr, v, vl);
+        }
+
+        --loop_stage;
+        stage_size >>= 1;
+    }
+
+    // for stages < 3 vectors are too small!.
+    llr_odd_stages(llrs, stage, loop_stage + 1, frame_size, row);
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32f_8u_polarbutterfly_32f_rvvseg(float* llrs,
+                                                         unsigned char* u,
+                                                         const int frame_exp,
+                                                         const int stage,
+                                                         const int u_num,
+                                                         const int row)
+{
+    const int frame_size = 0x01 << frame_exp;
+    if (row % 2) { // for odd rows just do the only necessary calculation and return.
+        const float* next_llrs = llrs + frame_size + row;
+        *(llrs + row) = llr_even(*(next_llrs - 1), *next_llrs, u[u_num - 1]);
+        return;
+    }
+
+    const int max_stage_depth = calculate_max_stage_depth_for_row(frame_exp, row);
+    if (max_stage_depth < 3) { // vectorized version needs larger vectors.
+        volk_32f_8u_polarbutterfly_32f_generic(llrs, u, frame_exp, stage, u_num, row);
+        return;
+    }
+
+    int loop_stage = max_stage_depth;
+    int stage_size = 0x01 << loop_stage;
+
+    float* src_llr_ptr;
+    float* dst_llr_ptr;
+
+    if (row) { // not necessary for ZERO row. == first bit to be decoded.
+        // first do bit combination for all stages
+        // effectively encode some decoded bits again.
+        unsigned char* u_target = u + frame_size;
+        unsigned char* u_temp = u + 2 * frame_size;
+        memcpy(u_temp, u + u_num - stage_size, sizeof(unsigned char) * stage_size);
+
+        volk_8u_x2_encodeframepolar_8u_rvv(u_target, u_temp, stage_size);
+
+        src_llr_ptr = llrs + (max_stage_depth + 1) * frame_size + row - stage_size;
+        dst_llr_ptr = llrs + max_stage_depth * frame_size + row;
+
+        size_t n = stage_size;
+        for (size_t vl; n > 0;
+             n -= vl, u_target += vl, src_llr_ptr += vl * 2, dst_llr_ptr += vl) {
+            vl = __riscv_vsetvl_e32m1(n);
+            vint8mf4_t v = __riscv_vle8_v_i8mf4((int8_t*)u_target, vl);
+            vfloat32m1x2_t llr = __riscv_vlseg2e32_v_f32m1x2(src_llr_ptr, vl);
+            vfloat32m1_t llr0 = __riscv_vget_f32m1(llr, 0);
+            vfloat32m1_t llr1 = __riscv_vget_f32m1(llr, 1);
+            llr0 = __riscv_vfneg_mu(__riscv_vmslt(v, 0, vl), llr0, llr0, vl);
+            llr0 = __riscv_vfadd(llr0, llr1, vl);
+            __riscv_vse32(dst_llr_ptr, llr0, vl);
+        }
+
+        --loop_stage;
+        stage_size >>= 1;
+    }
+
+    const int min_stage = stage > 2 ? stage : 2;
+
+    while (min_stage < loop_stage) {
+        dst_llr_ptr = llrs + loop_stage * frame_size + row;
+        src_llr_ptr = dst_llr_ptr + frame_size;
+
+        size_t n = stage_size;
+        for (size_t vl; n > 0; n -= vl, src_llr_ptr += vl * 2, dst_llr_ptr += vl) {
+            vl = __riscv_vsetvl_e32m1(n);
+            vfloat32m1x2_t llr = __riscv_vlseg2e32_v_f32m1x2(src_llr_ptr, vl);
+            vfloat32m1_t llr0 = __riscv_vget_f32m1(llr, 0);
+            vfloat32m1_t llr1 = __riscv_vget_f32m1(llr, 1);
+            vfloat32m1_t v =
+                __riscv_vfmin(__riscv_vfabs(llr0, vl), __riscv_vfabs(llr1, vl), vl);
+            v = __riscv_vfsgnjx(__riscv_vfsgnj(v, llr0, vl), llr1, vl);
+            __riscv_vse32(dst_llr_ptr, v, vl);
+        }
+
+        --loop_stage;
+        stage_size >>= 1;
+    }
+
+    // for stages < 3 vectors are too small!.
+    llr_odd_stages(llrs, stage, loop_stage + 1, frame_size, row);
+}
+#endif /* LV_HAVE_RVVSEG */
+
 #endif /* VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLY_32F_H_ */
diff --git a/kernels/volk/volk_32f_8u_polarbutterflypuppet_32f.h b/kernels/volk/volk_32f_8u_polarbutterflypuppet_32f.h
index c97da33d..6ebcd22e 100644
--- a/kernels/volk/volk_32f_8u_polarbutterflypuppet_32f.h
+++ b/kernels/volk/volk_32f_8u_polarbutterflypuppet_32f.h
@@ -162,5 +162,62 @@ static inline void volk_32f_8u_polarbutterflypuppet_32f_u_avx2(float* llrs,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_8u_polarbutterflypuppet_32f_rvv(float* llrs,
+                                                            const float* input,
+                                                            unsigned char* u,
+                                                            const int elements)
+{
+    (void)input; // suppress unused parameter warning
+
+    if (elements < 2) {
+        return;
+    }
+
+    unsigned int frame_size = maximum_frame_size(elements);
+    unsigned int frame_exp = log2_of_power_of_2(frame_size);
+
+    sanitize_bytes(u, elements);
+    clean_up_intermediate_values(llrs, u, frame_size, elements);
+    generate_error_free_input_vector(llrs + frame_exp * frame_size, u, frame_size);
+
+    unsigned int u_num = 0;
+    for (; u_num < frame_size; u_num++) {
+        volk_32f_8u_polarbutterfly_32f_rvv(llrs, u, frame_exp, 0, u_num, u_num);
+        u[u_num] = llrs[u_num] > 0 ? 0 : 1;
+    }
+
+    clean_up_intermediate_values(llrs, u, frame_size, elements);
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVVSEG
+static inline void volk_32f_8u_polarbutterflypuppet_32f_rvvseg(float* llrs,
+                                                               const float* input,
+                                                               unsigned char* u,
+                                                               const int elements)
+{
+    (void)input; // suppress unused parameter warning
+
+    if (elements < 2) {
+        return;
+    }
+
+    unsigned int frame_size = maximum_frame_size(elements);
+    unsigned int frame_exp = log2_of_power_of_2(frame_size);
+
+    sanitize_bytes(u, elements);
+    clean_up_intermediate_values(llrs, u, frame_size, elements);
+    generate_error_free_input_vector(llrs + frame_exp * frame_size, u, frame_size);
+
+    unsigned int u_num = 0;
+    for (; u_num < frame_size; u_num++) {
+        volk_32f_8u_polarbutterfly_32f_rvvseg(llrs, u, frame_exp, 0, u_num, u_num);
+        u[u_num] = llrs[u_num] > 0 ? 0 : 1;
+    }
+
+    clean_up_intermediate_values(llrs, u, frame_size, elements);
+}
+#endif /* LV_HAVE_RVVSEG */
 
 #endif /* VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLYPUPPET_32F_H_ */
diff --git a/kernels/volk/volk_32f_accumulator_s32f.h b/kernels/volk/volk_32f_accumulator_s32f.h
index 1cd8568e..7e9a81f7 100644
--- a/kernels/volk/volk_32f_accumulator_s32f.h
+++ b/kernels/volk/volk_32f_accumulator_s32f.h
@@ -232,4 +232,26 @@ static inline void volk_32f_accumulator_s32f_generic(float* result,
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32f_accumulator_s32f_rvv(float* result,
+                                                 const float* inputBuffer,
+                                                 unsigned int num_points)
+{
+    vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputBuffer, vl);
+        vsum = __riscv_vfadd_tu(vsum, vsum, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t v = RISCV_SHRINK8(vfadd, f, 32, vsum);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */
diff --git a/kernels/volk/volk_32f_acos_32f.h b/kernels/volk/volk_32f_acos_32f.h
index 5cf0d693..4331987c 100644
--- a/kernels/volk/volk_32f_acos_32f.h
+++ b/kernels/volk/volk_32f_acos_32f.h
@@ -102,13 +102,15 @@ static inline void volk_32f_acos_32f_a_avx2_fma(float* bVector,
         x = _mm256_add_ps(
             z, _mm256_and_ps(_mm256_sub_ps(_mm256_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm256_add_ps(x, _mm256_sqrt_ps(_mm256_fmadd_ps(x, x, fones)));
+        }
         x = _mm256_div_ps(fones, x);
         y = fzeroes;
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm256_fmadd_ps(
                 y, _mm256_mul_ps(x, x), _mm256_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm256_mul_ps(y, _mm256_mul_ps(x, ffours));
         condition = _mm256_cmp_ps(z, fones, _CMP_GT_OS);
@@ -171,14 +173,16 @@ volk_32f_acos_32f_a_avx(float* bVector, const float* aVector, unsigned int num_p
         x = _mm256_add_ps(
             z, _mm256_and_ps(_mm256_sub_ps(_mm256_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm256_add_ps(x,
                               _mm256_sqrt_ps(_mm256_add_ps(fones, _mm256_mul_ps(x, x))));
+        }
         x = _mm256_div_ps(fones, x);
         y = fzeroes;
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm256_add_ps(_mm256_mul_ps(y, _mm256_mul_ps(x, x)),
                               _mm256_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm256_mul_ps(y, _mm256_mul_ps(x, ffours));
         condition = _mm256_cmp_ps(z, fones, _CMP_GT_OS);
@@ -240,13 +244,15 @@ volk_32f_acos_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int nu
         condition = _mm_cmplt_ps(z, fones);
         x = _mm_add_ps(z, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+        }
         x = _mm_div_ps(fones, x);
         y = fzeroes;
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
                            _mm_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
         condition = _mm_cmpgt_ps(z, fones);
@@ -315,13 +321,15 @@ static inline void volk_32f_acos_32f_u_avx2_fma(float* bVector,
         x = _mm256_add_ps(
             z, _mm256_and_ps(_mm256_sub_ps(_mm256_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm256_add_ps(x, _mm256_sqrt_ps(_mm256_fmadd_ps(x, x, fones)));
+        }
         x = _mm256_div_ps(fones, x);
         y = fzeroes;
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm256_fmadd_ps(
                 y, _mm256_mul_ps(x, x), _mm256_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm256_mul_ps(y, _mm256_mul_ps(x, ffours));
         condition = _mm256_cmp_ps(z, fones, _CMP_GT_OS);
@@ -384,14 +392,16 @@ volk_32f_acos_32f_u_avx(float* bVector, const float* aVector, unsigned int num_p
         x = _mm256_add_ps(
             z, _mm256_and_ps(_mm256_sub_ps(_mm256_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm256_add_ps(x,
                               _mm256_sqrt_ps(_mm256_add_ps(fones, _mm256_mul_ps(x, x))));
+        }
         x = _mm256_div_ps(fones, x);
         y = fzeroes;
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm256_add_ps(_mm256_mul_ps(y, _mm256_mul_ps(x, x)),
                               _mm256_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm256_mul_ps(y, _mm256_mul_ps(x, ffours));
         condition = _mm256_cmp_ps(z, fones, _CMP_GT_OS);
@@ -453,14 +463,16 @@ volk_32f_acos_32f_u_sse4_1(float* bVector, const float* aVector, unsigned int nu
         condition = _mm_cmplt_ps(z, fones);
         x = _mm_add_ps(z, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), condition));
 
-        for (i = 0; i < 2; i++)
+        for (i = 0; i < 2; i++) {
             x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+        }
         x = _mm_div_ps(fones, x);
         y = fzeroes;
 
-        for (j = ACOS_TERMS - 1; j >= 0; j--)
+        for (j = ACOS_TERMS - 1; j >= 0; j--) {
             y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
                            _mm_set1_ps(pow(-1, j) / (2 * j + 1)));
+        }
 
         y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
         condition = _mm_cmpgt_ps(z, fones);
@@ -501,4 +513,72 @@ volk_32f_acos_32f_generic(float* bVector, const float* aVector, unsigned int num
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void
+volk_32f_acos_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t cpi = __riscv_vfmv_v_f_f32m2(3.1415927f, vlmax);
+    const vfloat32m2_t cpio2 = __riscv_vfmv_v_f_f32m2(1.5707964f, vlmax);
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf2 = __riscv_vfmv_v_f_f32m2(2.0f, vlmax);
+    const vfloat32m2_t cf4 = __riscv_vfmv_v_f_f32m2(4.0f, vlmax);
+
+#if ACOS_TERMS == 2
+    const vfloat32m2_t cfm1o3 = __riscv_vfmv_v_f_f32m2(-1 / 3.0f, vlmax);
+#elif ACOS_TERMS == 3
+    const vfloat32m2_t cf1o5 = __riscv_vfmv_v_f_f32m2(1 / 5.0f, vlmax);
+#elif ACOS_TERMS == 4
+    const vfloat32m2_t cfm1o7 = __riscv_vfmv_v_f_f32m2(-1 / 7.0f, vlmax);
+#endif
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t a =
+            __riscv_vfdiv(__riscv_vfsqrt(__riscv_vfmsac(cf1, v, v, vl), vl), v, vl);
+        vfloat32m2_t z = __riscv_vfabs(a, vl);
+        vfloat32m2_t x = __riscv_vfdiv_mu(__riscv_vmflt(z, cf1, vl), z, cf1, z, vl);
+        x = __riscv_vfadd(x, __riscv_vfsqrt(__riscv_vfmadd(x, x, cf1, vl), vl), vl);
+        x = __riscv_vfadd(x, __riscv_vfsqrt(__riscv_vfmadd(x, x, cf1, vl), vl), vl);
+        x = __riscv_vfdiv(cf1, x, vl);
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+
+#if ACOS_TERMS < 1
+        vfloat32m2_t y = __riscv_vfmv_v_f_f32m2(0, vl);
+#elif ACOS_TERMS == 1
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ACOS_TERMS == 2
+        vfloat32m2_t y = cfm1o3;
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ACOS_TERMS == 3
+        vfloat32m2_t y = cf1o5;
+        y = __riscv_vfmadd(y, xx, cfm1o3, vl);
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ACOS_TERMS == 4
+        vfloat32m2_t y = cfm1o7;
+        y = __riscv_vfmadd(y, xx, cf1o5, vl);
+        y = __riscv_vfmadd(y, xx, cfm1o3, vl);
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#else
+#error "ACOS_TERMS > 4 not supported by volk_32f_acos_32f_rvv"
+#endif
+        y = __riscv_vfmul(y, __riscv_vfmul(x, cf4, vl), vl);
+        y = __riscv_vfadd_mu(
+            __riscv_vmfgt(z, cf1, vl), y, y, __riscv_vfnmsub(y, cf2, cpio2, vl), vl);
+
+        vfloat32m2_t acosine;
+        acosine = __riscv_vfneg_mu(RISCV_VMFLTZ(32m2, a, vl), y, y, vl);
+        acosine = __riscv_vfadd_mu(RISCV_VMFLTZ(32m2, v, vl), acosine, acosine, cpi, vl);
+
+        __riscv_vse32(bVector, acosine, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_acos_32f_u_H */
diff --git a/kernels/volk/volk_32f_asin_32f.h b/kernels/volk/volk_32f_asin_32f.h
index 09377163..1914c39e 100644
--- a/kernels/volk/volk_32f_asin_32f.h
+++ b/kernels/volk/volk_32f_asin_32f.h
@@ -486,4 +486,70 @@ volk_32f_asin_32f_generic(float* bVector, const float* aVector, unsigned int num
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void
+volk_32f_asin_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t cpio2 = __riscv_vfmv_v_f_f32m2(1.5707964f, vlmax);
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf2 = __riscv_vfmv_v_f_f32m2(2.0f, vlmax);
+    const vfloat32m2_t cf4 = __riscv_vfmv_v_f_f32m2(4.0f, vlmax);
+
+#if ASIN_TERMS == 2
+    const vfloat32m2_t cfm1o3 = __riscv_vfmv_v_f_f32m2(-1 / 3.0f, vlmax);
+#elif ASIN_TERMS == 3
+    const vfloat32m2_t cf1o5 = __riscv_vfmv_v_f_f32m2(1 / 5.0f, vlmax);
+#elif ASIN_TERMS == 4
+    const vfloat32m2_t cfm1o7 = __riscv_vfmv_v_f_f32m2(-1 / 7.0f, vlmax);
+#endif
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t a =
+            __riscv_vfdiv(__riscv_vfsqrt(__riscv_vfmsac(cf1, v, v, vl), vl), v, vl);
+        vfloat32m2_t z = __riscv_vfabs(a, vl);
+        vfloat32m2_t x = __riscv_vfdiv_mu(__riscv_vmflt(z, cf1, vl), z, cf1, z, vl);
+        x = __riscv_vfadd(x, __riscv_vfsqrt(__riscv_vfmadd(x, x, cf1, vl), vl), vl);
+        x = __riscv_vfadd(x, __riscv_vfsqrt(__riscv_vfmadd(x, x, cf1, vl), vl), vl);
+        x = __riscv_vfdiv(cf1, x, vl);
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+
+#if ASIN_TERMS < 1
+        vfloat32m2_t y = __riscv_vfmv_v_f_f32m2(0, vl);
+#elif ASIN_TERMS == 1
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ASIN_TERMS == 2
+        vfloat32m2_t y = cfm1o3;
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ASIN_TERMS == 3
+        vfloat32m2_t y = cf1o5;
+        y = __riscv_vfmadd(y, xx, cfm1o3, vl);
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#elif ASIN_TERMS == 4
+        vfloat32m2_t y = cfm1o7;
+        y = __riscv_vfmadd(y, xx, cf1o5, vl);
+        y = __riscv_vfmadd(y, xx, cfm1o3, vl);
+        y = __riscv_vfmadd(y, xx, cf1, vl);
+#else
+#error "ASIN_TERMS > 4 not supported by volk_32f_asin_32f_rvv"
+#endif
+        y = __riscv_vfmul(y, __riscv_vfmul(x, cf4, vl), vl);
+        y = __riscv_vfadd_mu(
+            __riscv_vmfgt(z, cf1, vl), y, y, __riscv_vfnmsub(y, cf2, cpio2, vl), vl);
+
+        vfloat32m2_t asine;
+        asine = __riscv_vfneg_mu(RISCV_VMFLTZ(32m2, a, vl), y, y, vl);
+
+        __riscv_vse32(bVector, asine, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_asin_32f_u_H */
diff --git a/kernels/volk/volk_32f_atan_32f.h b/kernels/volk/volk_32f_atan_32f.h
index dc5987cb..300f46ca 100644
--- a/kernels/volk/volk_32f_atan_32f.h
+++ b/kernels/volk/volk_32f_atan_32f.h
@@ -293,4 +293,46 @@ volk_32f_atan_32f_generic(float* out, const float* in, unsigned int num_points)
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_atan_32f_rvv(float* out, const float* in, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t cpio2 = __riscv_vfmv_v_f_f32m2(1.5707964f, vlmax);
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(+0x1.ffffeap-1f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-0x1.55437p-2f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(+0x1.972be6p-3f, vlmax);
+    const vfloat32m2_t c7 = __riscv_vfmv_v_f_f32m2(-0x1.1436ap-3f, vlmax);
+    const vfloat32m2_t c9 = __riscv_vfmv_v_f_f32m2(+0x1.5785aap-4f, vlmax);
+    const vfloat32m2_t c11 = __riscv_vfmv_v_f_f32m2(-0x1.2f3004p-5f, vlmax);
+    const vfloat32m2_t c13 = __riscv_vfmv_v_f_f32m2(+0x1.01a37cp-7f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(in, vl);
+        vbool16_t mswap = __riscv_vmfgt(__riscv_vfabs(v, vl), cf1, vl);
+        vfloat32m2_t x = __riscv_vfdiv_mu(mswap, v, cf1, v, vl);
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+        vfloat32m2_t p = c13;
+        p = __riscv_vfmadd(p, xx, c11, vl);
+        p = __riscv_vfmadd(p, xx, c9, vl);
+        p = __riscv_vfmadd(p, xx, c7, vl);
+        p = __riscv_vfmadd(p, xx, c5, vl);
+        p = __riscv_vfmadd(p, xx, c3, vl);
+        p = __riscv_vfmadd(p, xx, c1, vl);
+        p = __riscv_vfmul(p, x, vl);
+
+        vfloat32m2_t t = __riscv_vfsub(__riscv_vfsgnj(cpio2, x, vl), p, vl);
+        p = __riscv_vmerge(p, t, mswap, vl);
+
+        __riscv_vse32(out, p, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_atan_32f_u_H */
diff --git a/kernels/volk/volk_32f_binary_slicer_32i.h b/kernels/volk/volk_32f_binary_slicer_32i.h
index 7606145b..861ef478 100644
--- a/kernels/volk/volk_32f_binary_slicer_32i.h
+++ b/kernels/volk/volk_32f_binary_slicer_32i.h
@@ -261,5 +261,21 @@ static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_binary_slicer_32i_rvv(int* cVector,
+                                                  const float* aVector,
+                                                  unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t*)aVector, vl);
+        v = __riscv_vsrl(__riscv_vnot(v, vl), 31, vl);
+        __riscv_vse32((uint32_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
diff --git a/kernels/volk/volk_32f_binary_slicer_8i.h b/kernels/volk/volk_32f_binary_slicer_8i.h
index c6929db4..9623ae90 100644
--- a/kernels/volk/volk_32f_binary_slicer_8i.h
+++ b/kernels/volk/volk_32f_binary_slicer_8i.h
@@ -500,5 +500,22 @@ static inline void volk_32f_binary_slicer_8i_neon(int8_t* cVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_binary_slicer_8i_rvv(int8_t* cVector,
+                                                 const float* aVector,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    vint8m2_t v0 = __riscv_vmv_v_x_i8m2(1, __riscv_vsetvlmax_e8m2());
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        vint8m2_t vn = __riscv_vmerge(v0, 0, __riscv_vmflt(v, 0, vl), vl);
+        __riscv_vse8(cVector, vn, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_binary_slicer_8i_H */
diff --git a/kernels/volk/volk_32f_convert_64f.h b/kernels/volk/volk_32f_convert_64f.h
index 93d1c611..5e907d39 100644
--- a/kernels/volk/volk_32f_convert_64f.h
+++ b/kernels/volk/volk_32f_convert_64f.h
@@ -230,5 +230,20 @@ static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
 }
 #endif /* LV_HAVE_SSE2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_convert_64f_rvv(double* outputVector,
+                                            const float* inputVector,
+                                            unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(inputVector, vl);
+        __riscv_vse64(outputVector, __riscv_vfwcvt_f(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_convert_64f_a_H */
diff --git a/kernels/volk/volk_32f_cos_32f.h b/kernels/volk/volk_32f_cos_32f.h
index 37785df0..854dd00e 100644
--- a/kernels/volk/volk_32f_cos_32f.h
+++ b/kernels/volk/volk_32f_cos_32f.h
@@ -127,8 +127,9 @@ static inline void volk_32f_cos_32f_a_avx512f(float* cosVector,
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm512_mul_ps(s, _mm512_sub_ps(ffours, s));
+        }
         s = _mm512_div_ps(s, ftwos);
 
         sine = _mm512_sqrt_ps(_mm512_mul_ps(_mm512_sub_ps(ftwos, s), s));
@@ -224,8 +225,9 @@ volk_32f_cos_32f_a_avx2_fma(float* bVector, const float* aVector, unsigned int n
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm256_mul_ps(s, _mm256_sub_ps(ffours, s));
+        }
         s = _mm256_div_ps(s, ftwos);
 
         sine = _mm256_sqrt_ps(_mm256_mul_ps(_mm256_sub_ps(ftwos, s), s));
@@ -335,8 +337,9 @@ volk_32f_cos_32f_a_avx2(float* bVector, const float* aVector, unsigned int num_p
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm256_mul_ps(s, _mm256_sub_ps(ffours, s));
+        }
         s = _mm256_div_ps(s, ftwos);
 
         sine = _mm256_sqrt_ps(_mm256_mul_ps(_mm256_sub_ps(ftwos, s), s));
@@ -442,8 +445,9 @@ volk_32f_cos_32f_a_sse4_1(float* bVector, const float* aVector, unsigned int num
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
+        }
         s = _mm_div_ps(s, ftwos);
 
         sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
@@ -546,8 +550,9 @@ static inline void volk_32f_cos_32f_u_avx512f(float* cosVector,
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm512_mul_ps(s, _mm512_sub_ps(ffours, s));
+        }
         s = _mm512_div_ps(s, ftwos);
 
         sine = _mm512_sqrt_ps(_mm512_mul_ps(_mm512_sub_ps(ftwos, s), s));
@@ -644,8 +649,9 @@ volk_32f_cos_32f_u_avx2_fma(float* bVector, const float* aVector, unsigned int n
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm256_mul_ps(s, _mm256_sub_ps(ffours, s));
+        }
         s = _mm256_div_ps(s, ftwos);
 
         sine = _mm256_sqrt_ps(_mm256_mul_ps(_mm256_sub_ps(ftwos, s), s));
@@ -755,8 +761,9 @@ volk_32f_cos_32f_u_avx2(float* bVector, const float* aVector, unsigned int num_p
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm256_mul_ps(s, _mm256_sub_ps(ffours, s));
+        }
         s = _mm256_div_ps(s, ftwos);
 
         sine = _mm256_sqrt_ps(_mm256_mul_ps(_mm256_sub_ps(ftwos, s), s));
@@ -995,5 +1002,65 @@ volk_32f_cos_32f_neon(float* bVector, const float* aVector, unsigned int num_poi
 
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_cos_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t c4oPi = __riscv_vfmv_v_f_f32m2(1.2732395f, vlmax);
+    const vfloat32m2_t cPio4a = __riscv_vfmv_v_f_f32m2(0.7853982f, vlmax);
+    const vfloat32m2_t cPio4b = __riscv_vfmv_v_f_f32m2(7.946627e-09f, vlmax);
+    const vfloat32m2_t cPio4c = __riscv_vfmv_v_f_f32m2(3.061617e-17f, vlmax);
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf4 = __riscv_vfmv_v_f_f32m2(4.0f, vlmax);
+
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(0.0833333333f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(0.0027777778f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(4.9603175e-05f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(5.5114638e-07f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t s = __riscv_vfabs(v, vl);
+        vint32m2_t q = __riscv_vfcvt_x(__riscv_vfmul(s, c4oPi, vl), vl);
+        vfloat32m2_t r = __riscv_vfcvt_f(__riscv_vadd(q, __riscv_vand(q, 1, vl), vl), vl);
+
+        s = __riscv_vfnmsac(s, cPio4a, r, vl);
+        s = __riscv_vfnmsac(s, cPio4b, r, vl);
+        s = __riscv_vfnmsac(s, cPio4c, r, vl);
+
+        s = __riscv_vfmul(s, 1 / 8.0f, vl);
+        s = __riscv_vfmul(s, s, vl);
+        vfloat32m2_t t = s;
+        s = __riscv_vfmsub(s, c5, c4, vl);
+        s = __riscv_vfmadd(s, t, c3, vl);
+        s = __riscv_vfmsub(s, t, c2, vl);
+        s = __riscv_vfmadd(s, t, cf1, vl);
+        s = __riscv_vfmul(s, t, vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, 1 / 2.0f, vl);
+
+        vfloat32m2_t sine =
+            __riscv_vfsqrt(__riscv_vfmul(__riscv_vfrsub(s, 2.0f, vl), s, vl), vl);
+        vfloat32m2_t cosine = __riscv_vfsub(cf1, s, vl);
+
+        vbool16_t m1 = __riscv_vmsne(__riscv_vand(__riscv_vadd(q, 1, vl), 2, vl), 0, vl);
+        vbool16_t m2 = __riscv_vmsne(__riscv_vand(__riscv_vadd(q, 2, vl), 4, vl), 0, vl);
+
+        cosine = __riscv_vmerge(cosine, sine, m1, vl);
+        cosine = __riscv_vfneg_mu(m2, cosine, cosine, vl);
+
+        __riscv_vse32(bVector, cosine, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_cos_32f_u_H */
diff --git a/kernels/volk/volk_32f_exp_32f.h b/kernels/volk/volk_32f_exp_32f.h
index 13d21201..85571dbc 100644
--- a/kernels/volk/volk_32f_exp_32f.h
+++ b/kernels/volk/volk_32f_exp_32f.h
@@ -266,4 +266,58 @@ volk_32f_exp_32f_generic(float* bVector, const float* aVector, unsigned int num_
 
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_exp_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t exp_hi = __riscv_vfmv_v_f_f32m2(88.376259f, vlmax);
+    const vfloat32m2_t exp_lo = __riscv_vfmv_v_f_f32m2(-88.376259f, vlmax);
+    const vfloat32m2_t log2EF = __riscv_vfmv_v_f_f32m2(1.442695f, vlmax);
+    const vfloat32m2_t exp_C1 = __riscv_vfmv_v_f_f32m2(-0.6933594f, vlmax);
+    const vfloat32m2_t exp_C2 = __riscv_vfmv_v_f_f32m2(0.000212194f, vlmax);
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf1o2 = __riscv_vfmv_v_f_f32m2(0.5f, vlmax);
+
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(1.9875691500e-4, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(1.3981999507e-3, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(8.3334519073e-3, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(4.1665795894e-2, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(1.6666665459e-1, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(5.0000001201e-1, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        v = __riscv_vfmin(v, exp_hi, vl);
+        v = __riscv_vfmax(v, exp_lo, vl);
+        vfloat32m2_t fx = __riscv_vfmadd(v, log2EF, cf1o2, vl);
+
+        vfloat32m2_t rtz = __riscv_vfcvt_f(__riscv_vfcvt_rtz_x(fx, vl), vl);
+        fx = __riscv_vfsub_mu(__riscv_vmfgt(rtz, fx, vl), rtz, rtz, cf1, vl);
+        v = __riscv_vfmacc(v, fx, exp_C1, vl);
+        v = __riscv_vfmacc(v, fx, exp_C2, vl);
+        vfloat32m2_t vv = __riscv_vfmul(v, v, vl);
+
+        vfloat32m2_t y = c0;
+        y = __riscv_vfmadd(y, v, c1, vl);
+        y = __riscv_vfmadd(y, v, c2, vl);
+        y = __riscv_vfmadd(y, v, c3, vl);
+        y = __riscv_vfmadd(y, v, c4, vl);
+        y = __riscv_vfmadd(y, v, c5, vl);
+        y = __riscv_vfmadd(y, vv, v, vl);
+        y = __riscv_vfadd(y, cf1, vl);
+
+        vfloat32m2_t pow2n = __riscv_vreinterpret_f32m2(
+            __riscv_vsll(__riscv_vadd(__riscv_vfcvt_rtz_x(fx, vl), 0x7f, vl), 23, vl));
+
+        __riscv_vse32(bVector, __riscv_vfmul(y, pow2n, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_exp_32f_u_H */
diff --git a/kernels/volk/volk_32f_expfast_32f.h b/kernels/volk/volk_32f_expfast_32f.h
index 7dfbaacb..3b65968a 100644
--- a/kernels/volk/volk_32f_expfast_32f.h
+++ b/kernels/volk/volk_32f_expfast_32f.h
@@ -301,4 +301,25 @@ static inline void volk_32f_expfast_32f_generic(float* bVector,
 }
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_expfast_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m8();
+    const vfloat32m8_t ca = __riscv_vfmv_v_f_f32m8(A / Mln2, vlmax);
+    const vfloat32m8_t cb = __riscv_vfmv_v_f_f32m8(B - C, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        v = __riscv_vfmadd(v, ca, cb, vl);
+        v = __riscv_vreinterpret_f32m8(__riscv_vfcvt_x(v, vl));
+        __riscv_vse32(bVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_expfast_32f_u_H */
diff --git a/kernels/volk/volk_32f_index_max_16u.h b/kernels/volk/volk_32f_index_max_16u.h
index 2aad087e..3e7c0fb9 100644
--- a/kernels/volk/volk_32f_index_max_16u.h
+++ b/kernels/volk/volk_32f_index_max_16u.h
@@ -359,4 +359,32 @@ volk_32f_index_max_16u_u_avx(uint16_t* target, const float* src0, uint32_t num_p
 
 #endif /*LV_HAVE_AVX*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_index_max_16u_rvv(uint16_t* target, const float* src0, uint32_t num_points)
+{
+    vfloat32m8_t vmax = __riscv_vfmv_v_f_f32m8(-FLT_MAX, __riscv_vsetvlmax_e32m8());
+    vuint16m4_t vmaxi = __riscv_vmv_v_x_u16m4(0, __riscv_vsetvlmax_e16m4());
+    vuint16m4_t vidx = __riscv_vid_v_u16m4(__riscv_vsetvlmax_e16m4());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(src0, vl);
+        vbool4_t m = __riscv_vmfgt(v, vmax, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m8();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK8(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(-FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool4_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_index_max_16u_u_H*/
diff --git a/kernels/volk/volk_32f_index_max_32u.h b/kernels/volk/volk_32f_index_max_32u.h
index 86dad0d1..0bf071fc 100644
--- a/kernels/volk/volk_32f_index_max_32u.h
+++ b/kernels/volk/volk_32f_index_max_32u.h
@@ -542,4 +542,32 @@ volk_32f_index_max_32u_u_sse(uint32_t* target, const float* src0, uint32_t num_p
 
 #endif /*LV_HAVE_SSE*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_index_max_32u_rvv(uint32_t* target, const float* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(-FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmaxi = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(src0, vl);
+        vbool8_t m = __riscv_vmfgt(v, vmax, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK4(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(-FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_index_max_32u_u_H*/
diff --git a/kernels/volk/volk_32f_index_min_16u.h b/kernels/volk/volk_32f_index_min_16u.h
index 000ecafc..5e1f0aa1 100644
--- a/kernels/volk/volk_32f_index_min_16u.h
+++ b/kernels/volk/volk_32f_index_min_16u.h
@@ -346,4 +346,32 @@ volk_32f_index_min_16u_u_avx(uint16_t* target, const float* source, uint32_t num
 
 #endif /*LV_HAVE_AVX*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_index_min_16u_rvv(uint16_t* target, const float* src0, uint32_t num_points)
+{
+    vfloat32m8_t vmin = __riscv_vfmv_v_f_f32m8(FLT_MAX, __riscv_vsetvlmax_e32m8());
+    vuint16m4_t vmini = __riscv_vmv_v_x_u16m4(0, __riscv_vsetvlmax_e16m4());
+    vuint16m4_t vidx = __riscv_vid_v_u16m4(__riscv_vsetvlmax_e16m4());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(src0, vl);
+        vbool4_t m = __riscv_vmflt(v, vmin, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m8();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK8(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool4_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_index_min_16u_u_H*/
diff --git a/kernels/volk/volk_32f_index_min_32u.h b/kernels/volk/volk_32f_index_min_32u.h
index 0c8bf8c0..7d01fbb4 100644
--- a/kernels/volk/volk_32f_index_min_32u.h
+++ b/kernels/volk/volk_32f_index_min_32u.h
@@ -508,4 +508,32 @@ volk_32f_index_min_32u_u_sse(uint32_t* target, const float* source, uint32_t num
 
 #endif /*LV_HAVE_SSE*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_index_min_32u_rvv(uint32_t* target, const float* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmin = __riscv_vfmv_v_f_f32m4(FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmini = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(src0, vl);
+        vbool8_t m = __riscv_vmflt(v, vmin, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK4(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_index_min_32u_u_H*/
diff --git a/kernels/volk/volk_32f_invsqrt_32f.h b/kernels/volk/volk_32f_invsqrt_32f.h
index e91b6c7c..838c9927 100644
--- a/kernels/volk/volk_32f_invsqrt_32f.h
+++ b/kernels/volk/volk_32f_invsqrt_32f.h
@@ -97,8 +97,9 @@ volk_32f_invsqrt_32f_a_avx(float* cVector, const float* aVector, unsigned int nu
     }
 
     number = eighthPoints * 8;
-    for (; number < num_points; number++)
+    for (; number < num_points; number++) {
         *cPtr++ = Q_rsqrt(*aPtr++);
+    }
 }
 #endif /* LV_HAVE_AVX */
 
@@ -156,8 +157,9 @@ volk_32f_invsqrt_32f_neon(float* cVector, const float* aVector, unsigned int num
         cPtr += 4;
     }
 
-    for (number = quarter_points * 4; number < num_points; number++)
+    for (number = quarter_points * 4; number < num_points; number++) {
         *cPtr++ = Q_rsqrt(*aPtr++);
+    }
 }
 #endif /* LV_HAVE_NEON */
 
@@ -198,9 +200,25 @@ volk_32f_invsqrt_32f_u_avx(float* cVector, const float* aVector, unsigned int nu
     }
 
     number = eighthPoints * 8;
-    for (; number < num_points; number++)
+    for (; number < num_points; number++) {
         *cPtr++ = Q_rsqrt(*aPtr++);
+    }
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_invsqrt_32f_rvv(float* cVector, const float* aVector, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        __riscv_vse32(cVector, __riscv_vfrsqrt7(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
diff --git a/kernels/volk/volk_32f_log2_32f.h b/kernels/volk/volk_32f_log2_32f.h
index 0443e56e..47a7cbe3 100644
--- a/kernels/volk/volk_32f_log2_32f.h
+++ b/kernels/volk/volk_32f_log2_32f.h
@@ -95,8 +95,9 @@ volk_32f_log2_32f_generic(float* bVector, const float* aVector, unsigned int num
     const float* aPtr = aVector;
     unsigned int number = 0;
 
-    for (number = 0; number < num_points; number++)
+    for (number = 0; number < num_points; number++) {
         *bPtr++ = log2f_non_ieee(*aPtr++);
+    }
 }
 #endif /* LV_HAVE_GENERIC */
 
@@ -718,5 +719,73 @@ volk_32f_log2_32f_u_avx2(float* bVector, const float* aVector, unsigned int num_
 
 #endif /* LV_HAVE_AVX2 for unaligned */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_log2_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+#if LOG_POLY_DEGREE == 6
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
+#elif LOG_POLY_DEGREE == 5
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
+#elif LOG_POLY_DEGREE == 4
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
+#elif LOG_POLY_DEGREE == 3
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
+#else
+#error
+#endif
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
+    const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
+    const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t a = __riscv_vfabs(v, vl);
+        vfloat32m2_t exp = __riscv_vfcvt_f(
+            __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
+            vl);
+        vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
+            __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
+
+        vfloat32m2_t mant = c0;
+        mant = __riscv_vfmadd(mant, frac, c1, vl);
+        mant = __riscv_vfmadd(mant, frac, c2, vl);
+#if LOG_POLY_DEGREE >= 4
+        mant = __riscv_vfmadd(mant, frac, c3, vl);
+#if LOG_POLY_DEGREE >= 5
+        mant = __riscv_vfmadd(mant, frac, c4, vl);
+#if LOG_POLY_DEGREE >= 6
+        mant = __riscv_vfmadd(mant, frac, c5, vl);
+#endif
+#endif
+#endif
+        exp = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
+
+        __riscv_vse32(bVector, exp, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_log2_32f_u_H */
diff --git a/kernels/volk/volk_32f_reciprocal_32f.h b/kernels/volk/volk_32f_reciprocal_32f.h
index 37bd16a8..f44a9885 100644
--- a/kernels/volk/volk_32f_reciprocal_32f.h
+++ b/kernels/volk/volk_32f_reciprocal_32f.h
@@ -198,4 +198,19 @@ volk_32f_reciprocal_32f_u_avx512(float* out, const float* in, unsigned int num_p
 }
 #endif /* LV_HAVE_AVX512F */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_reciprocal_32f_rvv(float* out, const float* in, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(in, vl);
+        __riscv_vse32(out, __riscv_vfrdiv(v, 1.0f, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_reciprocal_32f_u_H */
diff --git a/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h b/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
index a6eb37c2..607bd6d8 100644
--- a/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
+++ b/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
@@ -335,4 +335,41 @@ static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector,
 #endif /* LV_HAVE_AVX */
 
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_32f_fm_detect_32f_rvv(float* outputVector,
+                                                       const float* inputVector,
+                                                       const float bound,
+                                                       float* saveValue,
+                                                       unsigned int num_points)
+{
+    if (num_points < 1)
+        return;
+
+    *outputVector = *inputVector - *saveValue;
+    if (*outputVector > bound)
+        *outputVector -= 2 * bound;
+    if (*outputVector < -bound)
+        *outputVector += 2 * bound;
+    ++inputVector;
+    ++outputVector;
+
+    vfloat32m8_t v2bound = __riscv_vfmv_v_f_f32m8(bound * 2, __riscv_vsetvlmax_e32m8());
+
+    size_t n = num_points - 1;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(inputVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(inputVector - 1, vl);
+        vfloat32m8_t v = __riscv_vfsub(va, vb, vl);
+        v = __riscv_vfsub_mu(__riscv_vmfgt(v, bound, vl), v, v, v2bound, vl);
+        v = __riscv_vfadd_mu(__riscv_vmflt(v, -bound, vl), v, v, v2bound, vl);
+        __riscv_vse32(outputVector, v, vl);
+    }
+
+    *saveValue = inputVector[-1];
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */
diff --git a/kernels/volk/volk_32f_s32f_add_32f.h b/kernels/volk/volk_32f_s32f_add_32f.h
index d7ae2aa1..e3301a7a 100644
--- a/kernels/volk/volk_32f_s32f_add_32f.h
+++ b/kernels/volk/volk_32f_s32f_add_32f.h
@@ -258,4 +258,21 @@ static inline void volk_32f_s32f_add_32f_u_orc(float* cVector,
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_add_32f_rvv(float* cVector,
+                                             const float* aVector,
+                                             const float scalar,
+                                             unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        __riscv_vse32(cVector, __riscv_vfadd(v, scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_s32f_add_32f_a_H */
diff --git a/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h b/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h
index 816f6092..368a987a 100644
--- a/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h
+++ b/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h
@@ -52,6 +52,8 @@
 #include <stdio.h>
 #include <volk/volk_common.h>
 
+#include <volk/volk_32f_accumulator_s32f.h>
+
 #ifdef LV_HAVE_AVX
 #include <immintrin.h>
 
@@ -458,4 +460,37 @@ volk_32f_s32f_calc_spectral_noise_floor_32f_u_avx(float* noiseFloorAmplitude,
     *noiseFloorAmplitude = localNoiseFloorAmplitude;
 }
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_s32f_calc_spectral_noise_floor_32f_rvv(float* noiseFloorAmplitude,
+                                                const float* realDataPoints,
+                                                const float spectralExclusionValue,
+                                                const unsigned int num_points)
+{
+    float sum;
+    volk_32f_accumulator_s32f_rvv(&sum, realDataPoints, num_points);
+    float meanAmplitude = sum / num_points + spectralExclusionValue;
+
+    vfloat32m8_t vbin = __riscv_vfmv_v_f_f32m8(meanAmplitude, __riscv_vsetvlmax_e32m8());
+    vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points, binCount = 0;
+    for (size_t vl; n > 0; n -= vl, realDataPoints += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(realDataPoints, vl);
+        vbool4_t m = __riscv_vmfle(v, vbin, vl);
+        binCount += __riscv_vcpop(m, vl);
+        vsum = __riscv_vfadd_tumu(m, vsum, vsum, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t v = RISCV_SHRINK8(vfadd, f, 32, vsum);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    sum = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
+
+    *noiseFloorAmplitude = binCount == 0 ? meanAmplitude : sum / binCount;
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H */
diff --git a/kernels/volk/volk_32f_s32f_clamppuppet_32f.h b/kernels/volk/volk_32f_s32f_clamppuppet_32f.h
index 254bfdd5..b4a0e3af 100644
--- a/kernels/volk/volk_32f_s32f_clamppuppet_32f.h
+++ b/kernels/volk/volk_32f_s32f_clamppuppet_32f.h
@@ -62,4 +62,14 @@ static inline void volk_32f_s32f_clamppuppet_32f_u_sse4_1(float* out,
 }
 #endif
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_s32f_clamppuppet_32f_rvv(float* out,
+                                                     const float* in,
+                                                     const float min,
+                                                     unsigned int num_points)
+{
+    volk_32f_s32f_x2_clamp_32f_rvv(out, in, min, -min, num_points);
+}
+#endif
+
 #endif /* INCLUDED_volk_32f_s32f_clamppuppet_32f_H */
diff --git a/kernels/volk/volk_32f_s32f_convert_16i.h b/kernels/volk/volk_32f_s32f_convert_16i.h
index fe5a31b3..667e97f6 100644
--- a/kernels/volk/volk_32f_s32f_convert_16i.h
+++ b/kernels/volk/volk_32f_s32f_convert_16i.h
@@ -552,5 +552,22 @@ static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector,
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_convert_16i_rvv(int16_t* outputVector,
+                                                 const float* inputVector,
+                                                 const float scalar,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
+        v = __riscv_vfmul(v, scalar, vl);
+        __riscv_vse16(outputVector, __riscv_vfncvt_x(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
diff --git a/kernels/volk/volk_32f_s32f_convert_32i.h b/kernels/volk/volk_32f_s32f_convert_32i.h
index 0cd9dee8..b7b6fb1a 100644
--- a/kernels/volk/volk_32f_s32f_convert_32i.h
+++ b/kernels/volk/volk_32f_s32f_convert_32i.h
@@ -405,5 +405,22 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
 
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_convert_32i_rvv(int32_t* outputVector,
+                                                 const float* inputVector,
+                                                 const float scalar,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
+        v = __riscv_vfmul(v, scalar, vl);
+        __riscv_vse32(outputVector, __riscv_vfcvt_x(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
diff --git a/kernels/volk/volk_32f_s32f_convert_8i.h b/kernels/volk/volk_32f_s32f_convert_8i.h
index d47f95a0..a21ae7aa 100644
--- a/kernels/volk/volk_32f_s32f_convert_8i.h
+++ b/kernels/volk/volk_32f_s32f_convert_8i.h
@@ -437,5 +437,22 @@ static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector,
 
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_convert_8i_rvv(int8_t* outputVector,
+                                                const float* inputVector,
+                                                const float scalar,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
+        vint16m4_t vi = __riscv_vfncvt_x(__riscv_vfmul(v, scalar, vl), vl);
+        __riscv_vse8(outputVector, __riscv_vnclip(vi, 0, 0, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
diff --git a/kernels/volk/volk_32f_s32f_convertpuppet_8u.h b/kernels/volk/volk_32f_s32f_convertpuppet_8u.h
index 7f530c44..aa1258ba 100644
--- a/kernels/volk/volk_32f_s32f_convertpuppet_8u.h
+++ b/kernels/volk/volk_32f_s32f_convertpuppet_8u.h
@@ -102,4 +102,15 @@ static inline void volk_32f_s32f_convertpuppet_8u_a_sse(uint8_t* output,
     volk_32f_s32f_x2_convert_8u_a_sse(output, input, scale, 128.0, num_points);
 }
 #endif
+
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_s32f_convertpuppet_8u_rvv(uint8_t* output,
+                                                      const float* input,
+                                                      float scale,
+                                                      unsigned int num_points)
+{
+    volk_32f_s32f_x2_convert_8u_rvv(output, input, scale, 128.0, num_points);
+}
+#endif
+
 #endif
diff --git a/kernels/volk/volk_32f_s32f_mod_rangepuppet_32f.h b/kernels/volk/volk_32f_s32f_mod_rangepuppet_32f.h
index 3a178596..f4a7a2b0 100644
--- a/kernels/volk/volk_32f_s32f_mod_rangepuppet_32f.h
+++ b/kernels/volk/volk_32f_s32f_mod_rangepuppet_32f.h
@@ -86,4 +86,14 @@ static inline void volk_32f_s32f_mod_rangepuppet_32f_a_avx(float* output,
         output, input, bound - 3.131f, bound, num_points);
 }
 #endif
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_s32f_mod_rangepuppet_32f_rvv(float* output,
+                                                         const float* input,
+                                                         float bound,
+                                                         unsigned int num_points)
+{
+    volk_32f_s32f_s32f_mod_range_32f_rvv(
+        output, input, bound - 3.131f, bound, num_points);
+}
+#endif
 #endif
diff --git a/kernels/volk/volk_32f_s32f_multiply_32f.h b/kernels/volk/volk_32f_s32f_multiply_32f.h
index 26fc148c..27d86149 100644
--- a/kernels/volk/volk_32f_s32f_multiply_32f.h
+++ b/kernels/volk/volk_32f_s32f_multiply_32f.h
@@ -257,4 +257,21 @@ static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector,
 
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_multiply_32f_rvv(float* cVector,
+                                                  const float* aVector,
+                                                  const float scalar,
+                                                  unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        __riscv_vse32(cVector, __riscv_vfmul(v, scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */
diff --git a/kernels/volk/volk_32f_s32f_normalize.h b/kernels/volk/volk_32f_s32f_normalize.h
index 46f5799b..e572f24c 100644
--- a/kernels/volk/volk_32f_s32f_normalize.h
+++ b/kernels/volk/volk_32f_s32f_normalize.h
@@ -203,5 +203,19 @@ static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_s32f_normalize_rvv(float* vecBuffer, const float scalar, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, vecBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(vecBuffer, vl);
+        __riscv_vse32(vecBuffer, __riscv_vfmul(v, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_s32f_normalize_u_H */
diff --git a/kernels/volk/volk_32f_s32f_s32f_mod_range_32f.h b/kernels/volk/volk_32f_s32f_s32f_mod_range_32f.h
index d185f102..f5176150 100644
--- a/kernels/volk/volk_32f_s32f_s32f_mod_range_32f.h
+++ b/kernels/volk/volk_32f_s32f_s32f_mod_range_32f.h
@@ -359,5 +359,37 @@ static inline void volk_32f_s32f_s32f_mod_range_32f_a_sse(float* outputVector,
 }
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_s32f_mod_range_32f_rvv(float* outputVector,
+                                                        const float* inputVector,
+                                                        const float lower_bound,
+                                                        const float upper_bound,
+                                                        unsigned int num_points)
+{
+    const float dist = upper_bound - lower_bound;
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t vdist = __riscv_vfmv_v_f_f32m4(dist, vlmax);
+    vfloat32m4_t vmdist = __riscv_vfmv_v_f_f32m4(-dist, vlmax);
+    vfloat32m4_t vupper = __riscv_vfmv_v_f_f32m4(upper_bound, vlmax);
+    vfloat32m4_t vlower = __riscv_vfmv_v_f_f32m4(lower_bound, vlmax);
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, outputVector += vl, inputVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(inputVector, vl);
+        vfloat32m4_t vlt = __riscv_vfsub(vlower, v, vl);
+        vfloat32m4_t vgt = __riscv_vfsub(v, vupper, vl);
+        vbool8_t mlt = __riscv_vmflt(v, vlower, vl);
+        vfloat32m4_t vmul = __riscv_vmerge(vmdist, vdist, mlt, vl);
+        vfloat32m4_t vcnt = __riscv_vfdiv(__riscv_vmerge(vgt, vlt, mlt, vl), vdist, vl);
+        vcnt = __riscv_vfcvt_f(__riscv_vadd(__riscv_vfcvt_rtz_x(vcnt, vl), 1, vl), vl);
+        vbool8_t mgt = __riscv_vmfgt(v, vupper, vl);
+        v = __riscv_vfmacc_mu(__riscv_vmor(mlt, mgt, vl), v, vcnt, vmul, vl);
+
+        __riscv_vse32(outputVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_VOLK_32F_S32F_S32F_MOD_RANGE_32F_A_H */
diff --git a/kernels/volk/volk_32f_s32f_stddev_32f.h b/kernels/volk/volk_32f_s32f_stddev_32f.h
index 3b5bb6e1..8774277b 100644
--- a/kernels/volk/volk_32f_s32f_stddev_32f.h
+++ b/kernels/volk/volk_32f_s32f_stddev_32f.h
@@ -344,4 +344,32 @@ static inline void volk_32f_s32f_stddev_32f_u_avx(float* stddev,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32f_s32f_stddev_32f_rvv(float* stddev,
+                                                const float* inputBuffer,
+                                                const float mean,
+                                                unsigned int num_points)
+{
+    if (num_points == 0) {
+        *stddev = 0;
+        return;
+    }
+    vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputBuffer, vl);
+        vsum = __riscv_vfmacc_tu(vsum, v, v, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t v = RISCV_SHRINK8(vfadd, f, 32, vsum);
+    v = __riscv_vfredusum(v, __riscv_vfmv_s_f_f32m1(0, vl), vl);
+    float sum = __riscv_vfmv_f(v);
+    *stddev = sqrtf((sum / num_points) - (mean * mean));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_u_H */
diff --git a/kernels/volk/volk_32f_s32f_x2_clamp_32f.h b/kernels/volk/volk_32f_s32f_x2_clamp_32f.h
index 19d51795..2b194eaa 100644
--- a/kernels/volk/volk_32f_s32f_x2_clamp_32f.h
+++ b/kernels/volk/volk_32f_s32f_x2_clamp_32f.h
@@ -187,4 +187,25 @@ static inline void volk_32f_s32f_x2_clamp_32f_u_sse4_1(float* out,
 }
 #endif /* LV_HAVE_SSE4_1 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_x2_clamp_32f_rvv(float* out,
+                                                  const float* in,
+                                                  const float min,
+                                                  const float max,
+                                                  unsigned int num_points)
+{
+    vfloat32m8_t vmin = __riscv_vfmv_v_f_f32m8(min, __riscv_vsetvlmax_e32m8());
+    vfloat32m8_t vmax = __riscv_vfmv_v_f_f32m8(max, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(in, vl);
+        v = __riscv_vfmin(__riscv_vfmax(v, vmin, vl), vmax, vl);
+        __riscv_vse32(out, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_s32f_x2_clamp_32f_u_H */
diff --git a/kernels/volk/volk_32f_s32f_x2_convert_8u.h b/kernels/volk/volk_32f_s32f_x2_convert_8u.h
index a52cdf28..1ad2b1ac 100644
--- a/kernels/volk/volk_32f_s32f_x2_convert_8u.h
+++ b/kernels/volk/volk_32f_s32f_x2_convert_8u.h
@@ -612,5 +612,24 @@ static inline void volk_32f_s32f_x2_convert_8u_a_sse(uint8_t* outputVector,
 
 #endif /* LV_HAVE_SSE */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_s32f_x2_convert_8u_rvv(uint8_t* outputVector,
+                                                   const float* inputVector,
+                                                   const float scale,
+                                                   const float bias,
+                                                   unsigned int num_points)
+{
+    vfloat32m8_t vb = __riscv_vfmv_v_f_f32m8(bias, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
+        vuint16m4_t vi = __riscv_vfncvt_xu(__riscv_vfmadd_vf_f32m8(v, scale, vb, vl), vl);
+        __riscv_vse8(outputVector, __riscv_vnclipu(vi, 0, 0, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_s32f_x2_convert_8u_a_H */
diff --git a/kernels/volk/volk_32f_sin_32f.h b/kernels/volk/volk_32f_sin_32f.h
index 371e424f..a02f2260 100644
--- a/kernels/volk/volk_32f_sin_32f.h
+++ b/kernels/volk/volk_32f_sin_32f.h
@@ -127,8 +127,9 @@ static inline void volk_32f_sin_32f_a_avx512f(float* sinVector,
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm512_mul_ps(s, _mm512_sub_ps(ffours, s));
+        }
         s = _mm512_div_ps(s, ftwos);
 
         sine = _mm512_sqrt_ps(_mm512_mul_ps(_mm512_sub_ps(ftwos, s), s));
@@ -520,8 +521,9 @@ static inline void volk_32f_sin_32f_u_avx512f(float* sinVector,
                 cp1),
             s);
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 3; i++) {
             s = _mm512_mul_ps(s, _mm512_sub_ps(ffours, s));
+        }
         s = _mm512_div_ps(s, ftwos);
 
         sine = _mm512_sqrt_ps(_mm512_mul_ps(_mm512_sub_ps(ftwos, s), s));
@@ -893,5 +895,67 @@ volk_32f_sin_32f_neon(float* bVector, const float* aVector, unsigned int num_poi
 
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_sin_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t c4oPi = __riscv_vfmv_v_f_f32m2(1.2732395f, vlmax);
+    const vfloat32m2_t cPio4a = __riscv_vfmv_v_f_f32m2(0.7853982f, vlmax);
+    const vfloat32m2_t cPio4b = __riscv_vfmv_v_f_f32m2(7.946627e-09f, vlmax);
+    const vfloat32m2_t cPio4c = __riscv_vfmv_v_f_f32m2(3.061617e-17f, vlmax);
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf4 = __riscv_vfmv_v_f_f32m2(4.0f, vlmax);
+
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(0.0833333333f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(0.0027777778f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(4.9603175e-05, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(5.5114638e-07, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t s = __riscv_vfabs(v, vl);
+        vint32m2_t q = __riscv_vfcvt_x(__riscv_vfmul(s, c4oPi, vl), vl);
+        vfloat32m2_t r = __riscv_vfcvt_f(__riscv_vadd(q, __riscv_vand(q, 1, vl), vl), vl);
+
+        s = __riscv_vfnmsac(s, cPio4a, r, vl);
+        s = __riscv_vfnmsac(s, cPio4b, r, vl);
+        s = __riscv_vfnmsac(s, cPio4c, r, vl);
+
+        s = __riscv_vfmul(s, 1 / 8.0f, vl);
+        s = __riscv_vfmul(s, s, vl);
+        vfloat32m2_t t = s;
+        s = __riscv_vfmsub(s, c5, c4, vl);
+        s = __riscv_vfmadd(s, t, c3, vl);
+        s = __riscv_vfmsub(s, t, c2, vl);
+        s = __riscv_vfmadd(s, t, cf1, vl);
+        s = __riscv_vfmul(s, t, vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, 1 / 2.0f, vl);
+
+        vfloat32m2_t sine =
+            __riscv_vfsqrt(__riscv_vfmul(__riscv_vfrsub(s, 2.0f, vl), s, vl), vl);
+        vfloat32m2_t cosine = __riscv_vfsub(cf1, s, vl);
+
+        vbool16_t m1 = __riscv_vmsne(__riscv_vand(__riscv_vadd(q, 1, vl), 2, vl), 0, vl);
+        vbool16_t m2 = __riscv_vmxor(__riscv_vmslt(__riscv_vreinterpret_i32m2(v), 0, vl),
+                                     __riscv_vmsne(__riscv_vand(q, 4, vl), 0, vl),
+                                     vl);
+
+        sine = __riscv_vmerge(sine, cosine, m1, vl);
+        sine = __riscv_vfneg_mu(m2, sine, sine, vl);
+
+        __riscv_vse32(bVector, sine, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_sin_32f_u_H */
diff --git a/kernels/volk/volk_32f_sqrt_32f.h b/kernels/volk/volk_32f_sqrt_32f.h
index 9d269413..c5672534 100644
--- a/kernels/volk/volk_32f_sqrt_32f.h
+++ b/kernels/volk/volk_32f_sqrt_32f.h
@@ -205,4 +205,20 @@ volk_32f_sqrt_32f_u_avx(float* cVector, const float* aVector, unsigned int num_p
 }
 
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_sqrt_32f_rvv(float* cVector, const float* aVector, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
+        __riscv_vse32(cVector, __riscv_vfsqrt(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_sqrt_32f_u_H */
diff --git a/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h b/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h
index c71514bb..96535ed6 100644
--- a/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h
+++ b/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h
@@ -569,4 +569,75 @@ static inline void volk_32f_stddev_and_mean_32f_x2_a_avx(float* stddev,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_stddev_and_mean_32f_x2_rvv(float* stddev,
+                                                       float* mean,
+                                                       const float* inputBuffer,
+                                                       unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    if (num_points < vlmax) {
+        volk_32f_stddev_and_mean_32f_x2_generic(stddev, mean, inputBuffer, num_points);
+        return;
+    }
+
+    vfloat32m4_t vsum = __riscv_vle32_v_f32m4(inputBuffer, vlmax);
+    inputBuffer += vlmax;
+    vfloat32m4_t vsumsq = __riscv_vfmv_v_f_f32m4(0, vlmax);
+    size_t partLen = num_points / vlmax;
+
+    for (size_t i = 1; i < partLen; ++i, inputBuffer += vlmax) {
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(inputBuffer, vlmax);
+        vsum = __riscv_vfadd(vsum, v, vlmax);
+        vfloat32m4_t vaux = __riscv_vfmsub(v, i + 1.0f, vsum, vlmax);
+        vaux = __riscv_vfmul(vaux, vaux, vlmax);
+        vaux = __riscv_vfmul(vaux, 1.0f / (i * (i + 1.0f)), vlmax);
+        vsumsq = __riscv_vfadd(vsumsq, vaux, vlmax);
+    }
+
+    size_t vl = __riscv_vsetvlmax_e32m2();
+    vfloat32m2_t vsum2 =
+        __riscv_vfadd(__riscv_vget_f32m2(vsum, 0), __riscv_vget_f32m2(vsum, 1), vl);
+    vfloat32m2_t vfix2 =
+        __riscv_vfsub(__riscv_vget_f32m2(vsum, 0), __riscv_vget_f32m2(vsum, 1), vl);
+    vfix2 = __riscv_vfmul(vfix2, vfix2, vl);
+    vfloat32m2_t vsumsq2 =
+        __riscv_vfadd(__riscv_vget_f32m2(vsumsq, 0), __riscv_vget_f32m2(vsumsq, 1), vl);
+    vsumsq2 = __riscv_vfmacc(vsumsq2, 0.5f / (num_points / vlmax), vfix2, vl);
+
+    vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vsum1 =
+        __riscv_vfadd(__riscv_vget_f32m1(vsum2, 0), __riscv_vget_f32m1(vsum2, 1), vl);
+    vfloat32m1_t vfix1 =
+        __riscv_vfsub(__riscv_vget_f32m1(vsum2, 0), __riscv_vget_f32m1(vsum2, 1), vl);
+    vfix1 = __riscv_vfmul(vfix1, vfix1, vl);
+    vfloat32m1_t vsumsq1 =
+        __riscv_vfadd(__riscv_vget_f32m1(vsumsq2, 0), __riscv_vget_f32m1(vsumsq2, 1), vl);
+    vsumsq1 = __riscv_vfmacc(vsumsq1, 0.5f / (num_points / vlmax * 2), vfix1, vl);
+
+    for (size_t n = num_points / vlmax * 4, vl = vlmax >> 2; vl >>= 1; n *= 2) {
+        vfloat32m1_t vsumdown = __riscv_vslidedown(vsum1, vl, vl);
+        vfix1 = __riscv_vfsub(vsum1, vsumdown, vl);
+        vfix1 = __riscv_vfmul(vfix1, vfix1, vl);
+        vsum1 = __riscv_vfadd(vsum1, vsumdown, vl);
+        vsumsq1 = __riscv_vfadd(vsumsq1, __riscv_vslidedown(vsumsq1, vl, vl), vl);
+        vsumsq1 = __riscv_vfmacc(vsumsq1, 0.5f / n, vfix1, vl);
+    }
+
+    float sum = __riscv_vfmv_f(vsum1);
+    float sumsq = __riscv_vfmv_f(vsumsq1);
+
+    for (size_t i = partLen * vlmax; i < num_points; ++i) {
+        float in = *inputBuffer++;
+        sum += in;
+        sumsq = update_square_sum_1_val(sumsq, sum, i, in);
+    }
+
+    *stddev = sqrtf(sumsq / num_points);
+    *mean = sum / num_points;
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
diff --git a/kernels/volk/volk_32f_tan_32f.h b/kernels/volk/volk_32f_tan_32f.h
index 1ec0202f..28810c94 100644
--- a/kernels/volk/volk_32f_tan_32f.h
+++ b/kernels/volk/volk_32f_tan_32f.h
@@ -750,5 +750,72 @@ volk_32f_tan_32f_neon(float* bVector, const float* aVector, unsigned int num_poi
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_tan_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t c4oPi = __riscv_vfmv_v_f_f32m2(1.2732395f, vlmax);
+    const vfloat32m2_t cPio4a = __riscv_vfmv_v_f_f32m2(0.7853982f, vlmax);
+    const vfloat32m2_t cPio4b = __riscv_vfmv_v_f_f32m2(7.946627e-09f, vlmax);
+    const vfloat32m2_t cPio4c = __riscv_vfmv_v_f_f32m2(3.061617e-17f, vlmax);
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vfloat32m2_t cf4 = __riscv_vfmv_v_f_f32m2(4.0f, vlmax);
+
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(0.0833333333f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(0.0027777778f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(4.9603175e-05f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(5.5114638e-07f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t v = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t s = __riscv_vfabs(v, vl);
+        vint32m2_t q = __riscv_vfcvt_x(__riscv_vfmul(s, c4oPi, vl), vl);
+        vfloat32m2_t r = __riscv_vfcvt_f(__riscv_vadd(q, __riscv_vand(q, 1, vl), vl), vl);
+
+        s = __riscv_vfnmsac(s, cPio4a, r, vl);
+        s = __riscv_vfnmsac(s, cPio4b, r, vl);
+        s = __riscv_vfnmsac(s, cPio4c, r, vl);
+
+        s = __riscv_vfmul(s, 1 / 8.0f, vl);
+        s = __riscv_vfmul(s, s, vl);
+        vfloat32m2_t t = s;
+        s = __riscv_vfmsub(s, c5, c4, vl);
+        s = __riscv_vfmadd(s, t, c3, vl);
+        s = __riscv_vfmsub(s, t, c2, vl);
+        s = __riscv_vfmadd(s, t, cf1, vl);
+        s = __riscv_vfmul(s, t, vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, __riscv_vfsub(cf4, s, vl), vl);
+        s = __riscv_vfmul(s, 1 / 2.0f, vl);
+
+        vfloat32m2_t sine =
+            __riscv_vfsqrt(__riscv_vfmul(__riscv_vfrsub(s, 2.0f, vl), s, vl), vl);
+        vfloat32m2_t cosine = __riscv_vfsub(cf1, s, vl);
+
+        vbool16_t m1 = __riscv_vmsne(__riscv_vand(__riscv_vadd(q, 1, vl), 2, vl), 0, vl);
+        vbool16_t m2 = __riscv_vmsne(__riscv_vand(__riscv_vadd(q, 2, vl), 4, vl), 0, vl);
+        vbool16_t m3 = __riscv_vmxor(__riscv_vmslt(__riscv_vreinterpret_i32m2(v), 0, vl),
+                                     __riscv_vmsne(__riscv_vand(q, 4, vl), 0, vl),
+                                     vl);
+
+        vfloat32m2_t sine0 = sine;
+        sine = __riscv_vmerge(sine, cosine, m1, vl);
+        sine = __riscv_vfneg_mu(m3, sine, sine, vl);
+
+        cosine = __riscv_vmerge(cosine, sine0, m1, vl);
+        cosine = __riscv_vfneg_mu(m2, cosine, cosine, vl);
+
+        __riscv_vse32(bVector, __riscv_vfdiv(sine, cosine, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_tan_32f_u_H */
diff --git a/kernels/volk/volk_32f_tanh_32f.h b/kernels/volk/volk_32f_tanh_32f.h
index 3e36adb7..e90e4025 100644
--- a/kernels/volk/volk_32f_tanh_32f.h
+++ b/kernels/volk/volk_32f_tanh_32f.h
@@ -412,4 +412,38 @@ volk_32f_tanh_32f_u_avx_fma(float* cVector, const float* aVector, unsigned int n
 }
 #endif /* LV_HAVE_AVX && LV_HAVE_FMA */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32f_tanh_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(135135.0f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(17325.0f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(378.0f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(62370.0f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3150.0f, vlmax);
+    const vfloat32m2_t c6 = __riscv_vfmv_v_f_f32m2(28.0f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2_t x = __riscv_vle32_v_f32m2(aVector, vl);
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+        vfloat32m2_t a, b;
+        a = __riscv_vfadd(xx, c3, vl);
+        a = __riscv_vfmadd(a, xx, c2, vl);
+        a = __riscv_vfmadd(a, xx, c1, vl);
+        a = __riscv_vfmul(a, x, vl);
+        b = c6;
+        b = __riscv_vfmadd(b, xx, c5, vl);
+        b = __riscv_vfmadd(b, xx, c4, vl);
+        b = __riscv_vfmadd(b, xx, c1, vl);
+        __riscv_vse32(bVector, __riscv_vfdiv(a, b, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_tanh_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_add_32f.h b/kernels/volk/volk_32f_x2_add_32f.h
index f99e6b55..be9f6aa7 100644
--- a/kernels/volk/volk_32f_x2_add_32f.h
+++ b/kernels/volk/volk_32f_x2_add_32f.h
@@ -391,5 +391,22 @@ static inline void volk_32f_x2_add_32f_u_orc(float* cVector,
 
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_add_32f_rvv(float* cVector,
+                                           const float* aVector,
+                                           const float* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfadd(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_x2_add_32f_a_H */
diff --git a/kernels/volk/volk_32f_x2_divide_32f.h b/kernels/volk/volk_32f_x2_divide_32f.h
index bcb9da7c..fbece7d5 100644
--- a/kernels/volk/volk_32f_x2_divide_32f.h
+++ b/kernels/volk/volk_32f_x2_divide_32f.h
@@ -347,4 +347,22 @@ static inline void volk_32f_x2_divide_32f_u_avx(float* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_divide_32f_rvv(float* cVector,
+                                              const float* aVector,
+                                              const float* bVector,
+                                              unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfdiv(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_x2_divide_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_dot_prod_16i.h b/kernels/volk/volk_32f_x2_dot_prod_16i.h
index 3a4b7177..3502b3a5 100644
--- a/kernels/volk/volk_32f_x2_dot_prod_16i.h
+++ b/kernels/volk/volk_32f_x2_dot_prod_16i.h
@@ -678,5 +678,20 @@ static inline void volk_32f_x2_dot_prod_16i_u_avx512f(int16_t* result,
 
 #endif /*LV_HAVE_AVX512F*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+#include "volk_32f_x2_dot_prod_32f.h"
+
+static inline void volk_32f_x2_dot_prod_16i_rvv(int16_t* result,
+                                                const float* input,
+                                                const float* taps,
+                                                unsigned int num_points)
+{
+    float fresult = 0;
+    volk_32f_x2_dot_prod_32f_rvv(&fresult, input, taps, num_points);
+    *result = (int16_t)rintf(fresult);
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /*INCLUDED_volk_32f_x2_dot_prod_16i_H*/
diff --git a/kernels/volk/volk_32f_x2_dot_prod_32f.h b/kernels/volk/volk_32f_x2_dot_prod_32f.h
index 5bdb72ce..2d86411e 100644
--- a/kernels/volk/volk_32f_x2_dot_prod_32f.h
+++ b/kernels/volk/volk_32f_x2_dot_prod_32f.h
@@ -949,4 +949,28 @@ extern void volk_32f_x2_dot_prod_32f_a_neonasm_opts(float* cVector,
                                                     unsigned int num_points);
 #endif /* LV_HAVE_NEONV7 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32f_x2_dot_prod_32f_rvv(float* result,
+                                                const float* input,
+                                                const float* taps,
+                                                unsigned int num_points)
+{
+    vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v0 = __riscv_vle32_v_f32m8(input, vl);
+        vfloat32m8_t v1 = __riscv_vle32_v_f32m8(taps, vl);
+        vsum = __riscv_vfmacc_tu(vsum, v0, v1, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t v = RISCV_SHRINK8(vfadd, f, 32, vsum);
+    v = __riscv_vfredusum(v, __riscv_vfmv_s_f_f32m1(0, vl), vl);
+    *result = __riscv_vfmv_f(v);
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a_H*/
diff --git a/kernels/volk/volk_32f_x2_fm_detectpuppet_32f.h b/kernels/volk/volk_32f_x2_fm_detectpuppet_32f.h
index b4901543..62e30ad8 100644
--- a/kernels/volk/volk_32f_x2_fm_detectpuppet_32f.h
+++ b/kernels/volk/volk_32f_x2_fm_detectpuppet_32f.h
@@ -79,4 +79,17 @@ static inline void volk_32f_x2_fm_detectpuppet_32f_u_avx(float* outputVector,
         outputVector, inputVector, bound, saveValue, num_points);
 }
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_x2_fm_detectpuppet_32f_rvv(float* outputVector,
+                                                       const float* inputVector,
+                                                       float* saveValue,
+                                                       unsigned int num_points)
+{
+    const float bound = 2.0f;
+    volk_32f_s32f_32f_fm_detect_32f_rvv(
+        outputVector, inputVector, bound, saveValue, num_points);
+}
+#endif /* LV_HAVE_RVV */
+
 #endif /* INCLUDED_volk_32f_x2_fm_detectpuppet_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_interleave_32fc.h b/kernels/volk/volk_32f_x2_interleave_32fc.h
index 140fa9ff..2190f1a4 100644
--- a/kernels/volk/volk_32f_x2_interleave_32fc.h
+++ b/kernels/volk/volk_32f_x2_interleave_32fc.h
@@ -255,4 +255,43 @@ static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_interleave_32fc_rvv(lv_32fc_t* complexVector,
+                                                   const float* iBuffer,
+                                                   const float* qBuffer,
+                                                   unsigned int num_points)
+{
+    uint64_t* out = (uint64_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint32m4_t vr = __riscv_vle32_v_u32m4((const uint32_t*)iBuffer, vl);
+        vuint32m4_t vi = __riscv_vle32_v_u32m4((const uint32_t*)qBuffer, vl);
+        vuint64m8_t vc =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFFFFFF, vi, vl);
+        __riscv_vse64(out, vc, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_interleave_32fc_rvvseg(lv_32fc_t* complexVector,
+                                                      const float* iBuffer,
+                                                      const float* qBuffer,
+                                                      unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t vr = __riscv_vle32_v_f32m4(iBuffer, vl);
+        vfloat32m4_t vi = __riscv_vle32_v_f32m4(qBuffer, vl);
+        __riscv_vsseg2e32((float*)complexVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
diff --git a/kernels/volk/volk_32f_x2_max_32f.h b/kernels/volk/volk_32f_x2_max_32f.h
index 0f88ffe6..a0d48f75 100644
--- a/kernels/volk/volk_32f_x2_max_32f.h
+++ b/kernels/volk/volk_32f_x2_max_32f.h
@@ -330,4 +330,22 @@ static inline void volk_32f_x2_max_32f_u_avx(float* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_max_32f_rvv(float* cVector,
+                                           const float* aVector,
+                                           const float* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfmax(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_x2_max_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_min_32f.h b/kernels/volk/volk_32f_x2_min_32f.h
index 128c7483..2910b1f9 100644
--- a/kernels/volk/volk_32f_x2_min_32f.h
+++ b/kernels/volk/volk_32f_x2_min_32f.h
@@ -334,4 +334,22 @@ static inline void volk_32f_x2_min_32f_u_avx(float* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_min_32f_rvv(float* cVector,
+                                           const float* aVector,
+                                           const float* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfmin(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_x2_min_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_multiply_32f.h b/kernels/volk/volk_32f_x2_multiply_32f.h
index c36adfc2..af266041 100644
--- a/kernels/volk/volk_32f_x2_multiply_32f.h
+++ b/kernels/volk/volk_32f_x2_multiply_32f.h
@@ -356,5 +356,22 @@ static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector,
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_multiply_32f_rvv(float* cVector,
+                                                const float* aVector,
+                                                const float* bVector,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfmul(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */
diff --git a/kernels/volk/volk_32f_x2_pow_32f.h b/kernels/volk/volk_32f_x2_pow_32f.h
index 637fd4b7..c2b77233 100644
--- a/kernels/volk/volk_32f_x2_pow_32f.h
+++ b/kernels/volk/volk_32f_x2_pow_32f.h
@@ -976,4 +976,127 @@ static inline void volk_32f_x2_pow_32f_u_avx2(float* cVector,
 
 #endif /* LV_HAVE_AVX2 for unaligned */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_pow_32f_rvv(float* cVector,
+                                           const float* bVector,
+                                           const float* aVector,
+                                           unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m1();
+
+#if POW_POLY_DEGREE == 6
+    const vfloat32m1_t cl5 = __riscv_vfmv_v_f_f32m1(3.1157899f, vlmax);
+    const vfloat32m1_t cl4 = __riscv_vfmv_v_f_f32m1(-3.3241990f, vlmax);
+    const vfloat32m1_t cl3 = __riscv_vfmv_v_f_f32m1(2.5988452f, vlmax);
+    const vfloat32m1_t cl2 = __riscv_vfmv_v_f_f32m1(-1.2315303f, vlmax);
+    const vfloat32m1_t cl1 = __riscv_vfmv_v_f_f32m1(3.1821337e-1f, vlmax);
+    const vfloat32m1_t cl0 = __riscv_vfmv_v_f_f32m1(-3.4436006e-2f, vlmax);
+#elif POW_POLY_DEGREE == 5
+    const vfloat32m1_t cl4 = __riscv_vfmv_v_f_f32m1(2.8882704548164776201f, vlmax);
+    const vfloat32m1_t cl3 = __riscv_vfmv_v_f_f32m1(-2.52074962577807006663f, vlmax);
+    const vfloat32m1_t cl2 = __riscv_vfmv_v_f_f32m1(1.48116647521213171641f, vlmax);
+    const vfloat32m1_t cl1 = __riscv_vfmv_v_f_f32m1(-0.465725644288844778798f, vlmax);
+    const vfloat32m1_t cl0 = __riscv_vfmv_v_f_f32m1(0.0596515482674574969533f, vlmax);
+#elif POW_POLY_DEGREE == 4
+    const vfloat32m1_t cl3 = __riscv_vfmv_v_f_f32m1(2.61761038894603480148f, vlmax);
+    const vfloat32m1_t cl2 = __riscv_vfmv_v_f_f32m1(-1.75647175389045657003f, vlmax);
+    const vfloat32m1_t cl1 = __riscv_vfmv_v_f_f32m1(0.688243882994381274313f, vlmax);
+    const vfloat32m1_t cl0 = __riscv_vfmv_v_f_f32m1(-0.107254423828329604454f, vlmax);
+#elif POW_POLY_DEGREE == 3
+    const vfloat32m1_t cl2 = __riscv_vfmv_v_f_f32m1(2.28330284476918490682f, vlmax);
+    const vfloat32m1_t cl1 = __riscv_vfmv_v_f_f32m1(-1.04913055217340124191f, vlmax);
+    const vfloat32m1_t cl0 = __riscv_vfmv_v_f_f32m1(0.204446009836232697516f, vlmax);
+#else
+#error
+#endif
+
+    const vfloat32m1_t exp_hi = __riscv_vfmv_v_f_f32m1(88.376259f, vlmax);
+    const vfloat32m1_t exp_lo = __riscv_vfmv_v_f_f32m1(-88.376259f, vlmax);
+    const vfloat32m1_t log2EF = __riscv_vfmv_v_f_f32m1(1.442695f, vlmax);
+    const vfloat32m1_t exp_C1 = __riscv_vfmv_v_f_f32m1(-0.6933594f, vlmax);
+    const vfloat32m1_t exp_C2 = __riscv_vfmv_v_f_f32m1(0.000212194f, vlmax);
+    const vfloat32m1_t cf1 = __riscv_vfmv_v_f_f32m1(1.0f, vlmax);
+    const vfloat32m1_t cf1o2 = __riscv_vfmv_v_f_f32m1(0.5f, vlmax);
+    const vfloat32m1_t ln2 = __riscv_vfmv_v_f_f32m1(0.6931471805f, vlmax);
+
+    const vfloat32m1_t ce0 = __riscv_vfmv_v_f_f32m1(1.9875691500e-4, vlmax);
+    const vfloat32m1_t ce1 = __riscv_vfmv_v_f_f32m1(1.3981999507e-3, vlmax);
+    const vfloat32m1_t ce2 = __riscv_vfmv_v_f_f32m1(8.3334519073e-3, vlmax);
+    const vfloat32m1_t ce3 = __riscv_vfmv_v_f_f32m1(4.1665795894e-2, vlmax);
+    const vfloat32m1_t ce4 = __riscv_vfmv_v_f_f32m1(1.6666665459e-1, vlmax);
+    const vfloat32m1_t ce5 = __riscv_vfmv_v_f_f32m1(5.0000001201e-1, vlmax);
+
+    const vint32m1_t m1 = __riscv_vreinterpret_i32m1(cf1);
+    const vint32m1_t m2 = __riscv_vmv_v_x_i32m1(0x7FFFFF, vlmax);
+    const vint32m1_t c127 = __riscv_vmv_v_x_i32m1(127, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m1(n);
+        vfloat32m1_t va = __riscv_vle32_v_f32m1(aVector, vl);
+        vfloat32m1_t log;
+
+        { /* log(a) */
+            vfloat32m1_t a = __riscv_vfabs(va, vl);
+            vfloat32m1_t exp = __riscv_vfcvt_f(
+                __riscv_vsub(
+                    __riscv_vsra(__riscv_vreinterpret_i32m1(a), 23, vl), c127, vl),
+                vl);
+            vfloat32m1_t frac = __riscv_vreinterpret_f32m1(__riscv_vor(
+                __riscv_vand(__riscv_vreinterpret_i32m1(va), m2, vl), m1, vl));
+
+            vfloat32m1_t mant = cl0;
+            mant = __riscv_vfmadd(mant, frac, cl1, vl);
+            mant = __riscv_vfmadd(mant, frac, cl2, vl);
+#if POW_POLY_DEGREE >= 4
+            mant = __riscv_vfmadd(mant, frac, cl3, vl);
+#if POW_POLY_DEGREE >= 5
+            mant = __riscv_vfmadd(mant, frac, cl4, vl);
+#if POW_POLY_DEGREE >= 6
+            mant = __riscv_vfmadd(mant, frac, cl5, vl);
+#endif
+#endif
+#endif
+            log = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
+            log = __riscv_vfmul(log, ln2, vl);
+        }
+
+        vfloat32m1_t vb = __riscv_vle32_v_f32m1(bVector, vl);
+        vb = __riscv_vfmul(vb, log, vl); /* b*log(a) */
+        vfloat32m1_t exp;
+
+        { /* exp(b*log(a)) */
+            vb = __riscv_vfmin(vb, exp_hi, vl);
+            vb = __riscv_vfmax(vb, exp_lo, vl);
+            vfloat32m1_t fx = __riscv_vfmadd(vb, log2EF, cf1o2, vl);
+
+            vfloat32m1_t rtz = __riscv_vfcvt_f(__riscv_vfcvt_rtz_x(fx, vl), vl);
+            fx = __riscv_vfsub_mu(__riscv_vmfgt(rtz, fx, vl), rtz, rtz, cf1, vl);
+            vb = __riscv_vfmacc(vb, exp_C1, fx, vl);
+            vb = __riscv_vfmacc(vb, exp_C2, fx, vl);
+            vfloat32m1_t vv = __riscv_vfmul(vb, vb, vl);
+
+            vfloat32m1_t y = ce0;
+            y = __riscv_vfmadd(y, vb, ce1, vl);
+            y = __riscv_vfmadd(y, vb, ce2, vl);
+            y = __riscv_vfmadd(y, vb, ce3, vl);
+            y = __riscv_vfmadd(y, vb, ce4, vl);
+            y = __riscv_vfmadd(y, vb, ce5, vl);
+            y = __riscv_vfmadd(y, vv, vb, vl);
+            y = __riscv_vfadd(y, cf1, vl);
+
+            vfloat32m1_t pow2n = __riscv_vreinterpret_f32m1(__riscv_vsll(
+                __riscv_vadd(__riscv_vfcvt_rtz_x(fx, vl), c127, vl), 23, vl));
+
+            exp = __riscv_vfmul(y, pow2n, vl);
+        }
+
+        __riscv_vse32(cVector, exp, vl);
+    }
+}
+
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_x2_log2_32f_u_H */
diff --git a/kernels/volk/volk_32f_x2_powpuppet_32f.h b/kernels/volk/volk_32f_x2_powpuppet_32f.h
index 419ee18e..d4df0b3d 100644
--- a/kernels/volk/volk_32f_x2_powpuppet_32f.h
+++ b/kernels/volk/volk_32f_x2_powpuppet_32f.h
@@ -111,4 +111,16 @@ static inline void volk_32f_x2_powpuppet_32f_u_avx2(float* cVector,
 }
 #endif /* LV_HAVE_AVX2 for unaligned */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32f_x2_powpuppet_32f_rvv(float* cVector,
+                                                 const float* bVector,
+                                                 const float* aVector,
+                                                 unsigned int num_points)
+{
+    float* aVectorPos = make_positive(aVector, num_points);
+    volk_32f_x2_pow_32f_rvv(cVector, bVector, aVectorPos, num_points);
+    volk_free(aVectorPos);
+}
+#endif /* LV_HAVE_RVV */
+
 #endif /* INCLUDED_volk_32f_x2_powpuppet_32f_H */
diff --git a/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h b/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
index 2ddfb0fd..9a78a01a 100644
--- a/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
+++ b/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
@@ -326,5 +326,51 @@ static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVec
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_s32f_interleave_16ic_rvv(lv_16sc_t* complexVector,
+                                                        const float* iBuffer,
+                                                        const float* qBuffer,
+                                                        const float scalar,
+                                                        unsigned int num_points)
+{
+    uint32_t* out = (uint32_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t vrf = __riscv_vle32_v_f32m8(iBuffer, vl);
+        vfloat32m8_t vif = __riscv_vle32_v_f32m8(qBuffer, vl);
+        vint16m4_t vri = __riscv_vfncvt_x(__riscv_vfmul(vrf, scalar, vl), vl);
+        vint16m4_t vii = __riscv_vfncvt_x(__riscv_vfmul(vif, scalar, vl), vl);
+        vuint16m4_t vr = __riscv_vreinterpret_u16m4(vri);
+        vuint16m4_t vi = __riscv_vreinterpret_u16m4(vii);
+        vuint32m8_t vc = __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFF, vi, vl);
+        __riscv_vse32(out, vc, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_s32f_interleave_16ic_rvvseg(lv_16sc_t* complexVector,
+                                                           const float* iBuffer,
+                                                           const float* qBuffer,
+                                                           const float scalar,
+                                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t vrf = __riscv_vle32_v_f32m8(iBuffer, vl);
+        vfloat32m8_t vif = __riscv_vle32_v_f32m8(qBuffer, vl);
+        vint16m4_t vri = __riscv_vfncvt_x(__riscv_vfmul(vrf, scalar, vl), vl);
+        vint16m4_t vii = __riscv_vfncvt_x(__riscv_vfmul(vif, scalar, vl), vl);
+        __riscv_vsseg2e16(
+            (int16_t*)complexVector, __riscv_vcreate_v_i16m4x2(vri, vii), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */
diff --git a/kernels/volk/volk_32f_x2_subtract_32f.h b/kernels/volk/volk_32f_x2_subtract_32f.h
index 631b72f8..e3d563fc 100644
--- a/kernels/volk/volk_32f_x2_subtract_32f.h
+++ b/kernels/volk/volk_32f_x2_subtract_32f.h
@@ -272,4 +272,22 @@ static inline void volk_32f_x2_subtract_32f_u_avx(float* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32f_x2_subtract_32f_rvv(float* cVector,
+                                                const float* aVector,
+                                                const float* bVector,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vfsub(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32f_x2_subtract_32f_u_H */
diff --git a/kernels/volk/volk_32f_x3_sum_of_poly_32f.h b/kernels/volk/volk_32f_x3_sum_of_poly_32f.h
index 6afd262a..b9a83714 100644
--- a/kernels/volk/volk_32f_x3_sum_of_poly_32f.h
+++ b/kernels/volk/volk_32f_x3_sum_of_poly_32f.h
@@ -341,8 +341,9 @@ static inline void volk_32f_x3_sum_of_poly_32f_generic(float* target,
             result[k] += center_point_array[2] * thrd + center_point_array[3] * frth;
         }
     }
-    for (k = 0; k < 8; k += 2)
+    for (k = 0; k < 8; k += 2) {
         result[k] = result[k] + result[k + 1];
+    }
 
     *target = result[0] + result[2] + result[4] + result[6];
 
@@ -654,4 +655,45 @@ static inline void volk_32f_x3_sum_of_poly_32f_u_avx(float* target,
 }
 #endif // LV_HAVE_AVX
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32f_x3_sum_of_poly_32f_rvv(float* target,
+                                                   float* src0,
+                                                   float* center_point_array,
+                                                   float* cutoff,
+                                                   unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t vsum = __riscv_vfmv_v_f_f32m4(0, vlmax);
+    float mul1 = center_point_array[0]; // scalar to avoid register spills
+    float mul2 = center_point_array[1];
+    vfloat32m4_t vmul3 = __riscv_vfmv_v_f_f32m4(center_point_array[2], vlmax);
+    vfloat32m4_t vmul4 = __riscv_vfmv_v_f_f32m4(center_point_array[3], vlmax);
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(*cutoff, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4_t v = __riscv_vle32_v_f32m4(src0, vl);
+        vfloat32m4_t v1 = __riscv_vfmax(v, vmax, vl);
+        vfloat32m4_t v2 = __riscv_vfmul(v1, v1, vl);
+        vfloat32m4_t v3 = __riscv_vfmul(v1, v2, vl);
+        vfloat32m4_t v4 = __riscv_vfmul(v2, v2, vl);
+        v2 = __riscv_vfmul(v2, mul2, vl);
+        v4 = __riscv_vfmul(v4, vmul4, vl);
+        v1 = __riscv_vfmadd(v1, mul1, v2, vl);
+        v3 = __riscv_vfmadd(v3, vmul3, v4, vl);
+        v1 = __riscv_vfadd(v1, v3, vl);
+        vsum = __riscv_vfadd_tu(vsum, vsum, v1, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t v = RISCV_SHRINK4(vfadd, f, 32, vsum);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    float sum = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
+    *target = sum + num_points * center_point_array[4];
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_u_H*/
diff --git a/kernels/volk/volk_32fc_32f_add_32fc.h b/kernels/volk/volk_32fc_32f_add_32fc.h
index b820ed5d..24eff2b4 100644
--- a/kernels/volk/volk_32fc_32f_add_32fc.h
+++ b/kernels/volk/volk_32fc_32f_add_32fc.h
@@ -230,5 +230,24 @@ static inline void volk_32fc_32f_add_32fc_neon(lv_32fc_t* cVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_32f_add_32fc_rvv(lv_32fc_t* cVector,
+                                              const lv_32fc_t* aVector,
+                                              const float* bVector,
+                                              unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, cVector += vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m8_t vc = __riscv_vle32_v_f32m8((const float*)aVector, vl * 2);
+        vuint32m4_t v = __riscv_vle32_v_u32m4((const uint32_t*)bVector, vl);
+        vfloat32m8_t vf = __riscv_vreinterpret_f32m8(
+            __riscv_vreinterpret_u32m8(__riscv_vzext_vf2_u64m8(v, vl)));
+        __riscv_vse32((float*)cVector, __riscv_vfadd(vc, vf, vl * 2), vl * 2);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32fc_32f_add_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_32f_dot_prod_32fc.h b/kernels/volk/volk_32fc_32f_dot_prod_32fc.h
index 363bf657..472d405a 100644
--- a/kernels/volk/volk_32fc_32f_dot_prod_32fc.h
+++ b/kernels/volk/volk_32fc_32f_dot_prod_32fc.h
@@ -743,5 +743,63 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_sse(lv_32fc_t* result,
 
 #endif /*LV_HAVE_SSE*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_32f_dot_prod_32fc_rvv(lv_32fc_t* result,
+                                                   const lv_32fc_t* input,
+                                                   const float* taps,
+                                                   unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t va = __riscv_vle64_v_u64m8((const uint64_t*)input, vl);
+        vfloat32m4_t vbr = __riscv_vle32_v_f32m4(taps, vl), vbi = vbr;
+        vfloat32m4_t var = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 0, vl));
+        vfloat32m4_t vai = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 32, vl));
+        vsumr = __riscv_vfmacc_tu(vsumr, var, vbr, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vai, vbi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_32f_dot_prod_32fc_rvvseg(lv_32fc_t* result,
+                                                      const lv_32fc_t* input,
+                                                      const float* taps,
+                                                      unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)input, vl);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vbr = __riscv_vle32_v_f32m4(taps, vl), vbi = vbr;
+        vsumr = __riscv_vfmacc_tu(vsumr, var, vbr, vl);
+        vsumi = __riscv_vfmacc_tu(vsumi, vai, vbi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /*INCLUDED_volk_32fc_32f_dot_prod_32fc_H*/
diff --git a/kernels/volk/volk_32fc_32f_multiply_32fc.h b/kernels/volk/volk_32fc_32f_multiply_32fc.h
index 76ed1af7..b731414c 100644
--- a/kernels/volk/volk_32fc_32f_multiply_32fc.h
+++ b/kernels/volk/volk_32fc_32f_multiply_32fc.h
@@ -224,5 +224,24 @@ static inline void volk_32fc_32f_multiply_32fc_u_orc(lv_32fc_t* cVector,
 
 #endif /* LV_HAVE_GENERIC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_32f_multiply_32fc_rvv(lv_32fc_t* cVector,
+                                                   const lv_32fc_t* aVector,
+                                                   const float* bVector,
+                                                   unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, cVector += vl, aVector += vl, bVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m8_t vc = __riscv_vle32_v_f32m8((const float*)aVector, vl * 2);
+        vuint32m4_t v = __riscv_vle32_v_u32m4((const uint32_t*)bVector, vl);
+        vfloat32m8_t vf = __riscv_vreinterpret_f32m8(__riscv_vreinterpret_u32m8(
+            __riscv_vwmaccu(__riscv_vwaddu_vv(v, v, vl), 0xFFFFFFFF, v, vl)));
+        __riscv_vse32((float*)cVector, __riscv_vfmul(vc, vf, vl * 2), vl * 2);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_accumulator_s32fc.h b/kernels/volk/volk_32fc_accumulator_s32fc.h
index d7267ea6..72266bd5 100644
--- a/kernels/volk/volk_32fc_accumulator_s32fc.h
+++ b/kernels/volk/volk_32fc_accumulator_s32fc.h
@@ -276,4 +276,33 @@ static inline void volk_32fc_accumulator_s32fc_neon(lv_32fc_t* result,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_accumulator_s32fc_rvv(lv_32fc_t* result,
+                                                   const lv_32fc_t* inputBuffer,
+                                                   unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m8();
+    vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, vlmax);
+    const float* in = (const float*)inputBuffer;
+    size_t n = num_points * 2;
+    for (size_t vl; n > 0; n -= vl, in += vl) {
+        vl = __riscv_vsetvl_e32m8(n < vlmax ? n : vlmax); /* force exact vl */
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(in, vl);
+        vsum = __riscv_vfadd_tu(vsum, vsum, v, vl);
+    }
+    vuint64m8_t vsumu = __riscv_vreinterpret_u64m8(__riscv_vreinterpret_u32m8(vsum));
+    vfloat32m4_t vsum1 = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vsumu, 0, vlmax));
+    vfloat32m4_t vsum2 = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vsumu, 32, vlmax));
+    vlmax = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsum1);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsum2);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vlmax);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vlmax)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vlmax)));
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_accumulator_s32fc_a_H */
diff --git a/kernels/volk/volk_32fc_conjugate_32fc.h b/kernels/volk/volk_32fc_conjugate_32fc.h
index aa1134ab..2edff119 100644
--- a/kernels/volk/volk_32fc_conjugate_32fc.h
+++ b/kernels/volk/volk_32fc_conjugate_32fc.h
@@ -260,4 +260,21 @@ static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
 #endif /* LV_HAVE_NEON */
 
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_conjugate_32fc_rvv(lv_32fc_t* cVector,
+                                                const lv_32fc_t* aVector,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    vuint64m8_t m = __riscv_vmv_v_x_u64m8(1ull << 63, __riscv_vsetvlmax_e64m8());
+    for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint64m8_t v = __riscv_vle64_v_u64m8((const uint64_t*)aVector, vl);
+        __riscv_vse64((uint64_t*)cVector, __riscv_vxor(v, m, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_convert_16ic.h b/kernels/volk/volk_32fc_convert_16ic.h
index a38cce64..55768ab0 100644
--- a/kernels/volk/volk_32fc_convert_16ic.h
+++ b/kernels/volk/volk_32fc_convert_16ic.h
@@ -416,4 +416,23 @@ static inline void volk_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector,
     }
 }
 #endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_convert_16ic_rvv(lv_16sc_t* outputVector,
+                                              const lv_32fc_t* inputVector,
+                                              unsigned int num_points)
+{
+    int16_t* out = (int16_t*)outputVector;
+    float* in = (float*)inputVector;
+    size_t n = num_points * 2;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vle32_v_f32m8(in, vl);
+        __riscv_vse16(out, __riscv_vfncvt_x(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_convert_16ic_u_H */
diff --git a/kernels/volk/volk_32fc_deinterleave_32f_x2.h b/kernels/volk/volk_32fc_deinterleave_32f_x2.h
index f269d661..569942fe 100644
--- a/kernels/volk/volk_32fc_deinterleave_32f_x2.h
+++ b/kernels/volk/volk_32fc_deinterleave_32f_x2.h
@@ -254,4 +254,46 @@ static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
     }
 }
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_32f_x2_rvv(float* iBuffer,
+                                                     float* qBuffer,
+                                                     const lv_32fc_t* complexVector,
+                                                     unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
+        vuint32m4_t vr = __riscv_vnsrl(vc, 0, vl);
+        vuint32m4_t vi = __riscv_vnsrl(vc, 32, vl);
+        __riscv_vse32((uint32_t*)iBuffer, vr, vl);
+        __riscv_vse32((uint32_t*)qBuffer, vi, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_32f_x2_rvvseg(float* iBuffer,
+                                                        float* qBuffer,
+                                                        const lv_32fc_t* complexVector,
+                                                        unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint32m4x2_t vc =
+            __riscv_vlseg2e32_v_u32m4x2((const uint32_t*)complexVector, vl);
+        vuint32m4_t vr = __riscv_vget_u32m4(vc, 0);
+        vuint32m4_t vi = __riscv_vget_u32m4(vc, 1);
+        __riscv_vse32((uint32_t*)iBuffer, vr, vl);
+        __riscv_vse32((uint32_t*)qBuffer, vi, vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
diff --git a/kernels/volk/volk_32fc_deinterleave_64f_x2.h b/kernels/volk/volk_32fc_deinterleave_64f_x2.h
index 1af5098f..6599780b 100644
--- a/kernels/volk/volk_32fc_deinterleave_64f_x2.h
+++ b/kernels/volk/volk_32fc_deinterleave_64f_x2.h
@@ -314,4 +314,44 @@ static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
 }
 #endif /* LV_HAVE_NEONV8 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_64f_x2_rvv(double* iBuffer,
+                                                     double* qBuffer,
+                                                     const lv_32fc_t* complexVector,
+                                                     unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
+        __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_64f_x2_rvvseg(double* iBuffer,
+                                                        double* qBuffer,
+                                                        const lv_32fc_t* complexVector,
+                                                        unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
+        __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
diff --git a/kernels/volk/volk_32fc_deinterleave_imag_32f.h b/kernels/volk/volk_32fc_deinterleave_imag_32f.h
index 9e330d33..bb54411b 100644
--- a/kernels/volk/volk_32fc_deinterleave_imag_32f.h
+++ b/kernels/volk/volk_32fc_deinterleave_imag_32f.h
@@ -229,4 +229,22 @@ static inline void volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer,
     }
 }
 #endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_imag_32f_rvv(float* qBuffer,
+                                                       const lv_32fc_t* complexVector,
+                                                       unsigned int num_points)
+{
+    const uint64_t* in = (const uint64_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8(in, vl);
+        __riscv_vse32((uint32_t*)qBuffer, __riscv_vnsrl(vc, 32, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */
diff --git a/kernels/volk/volk_32fc_deinterleave_real_32f.h b/kernels/volk/volk_32fc_deinterleave_real_32f.h
index 6fc0679d..f75cdd03 100644
--- a/kernels/volk/volk_32fc_deinterleave_real_32f.h
+++ b/kernels/volk/volk_32fc_deinterleave_real_32f.h
@@ -234,4 +234,21 @@ static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_real_32f_rvv(float* iBuffer,
+                                                       const lv_32fc_t* complexVector,
+                                                       unsigned int num_points)
+{
+    const uint64_t* in = (const uint64_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8(in, vl);
+        __riscv_vse32((uint32_t*)iBuffer, __riscv_vnsrl(vc, 0, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
diff --git a/kernels/volk/volk_32fc_deinterleave_real_64f.h b/kernels/volk/volk_32fc_deinterleave_real_64f.h
index 31d8f3ec..5c6b0c95 100644
--- a/kernels/volk/volk_32fc_deinterleave_real_64f.h
+++ b/kernels/volk/volk_32fc_deinterleave_real_64f.h
@@ -240,4 +240,21 @@ static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_deinterleave_real_64f_rvv(double* iBuffer,
+                                                       const lv_32fc_t* complexVector,
+                                                       unsigned int num_points)
+{
+    const uint64_t* in = (const uint64_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint32m4_t vi = __riscv_vnsrl(__riscv_vle64_v_u64m8(in, vl), 0, vl);
+        __riscv_vse64(iBuffer, __riscv_vfwcvt_f(__riscv_vreinterpret_f32m4(vi), vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
diff --git a/kernels/volk/volk_32fc_index_max_16u.h b/kernels/volk/volk_32fc_index_max_16u.h
index 28b51766..781876d1 100644
--- a/kernels/volk/volk_32fc_index_max_16u.h
+++ b/kernels/volk/volk_32fc_index_max_16u.h
@@ -321,7 +321,7 @@ volk_32fc_index_max_16u_generic(uint16_t* target, lv_32fc_t* src0, uint32_t num_
 
     uint32_t i = 0;
 
-    for (; i<num_bytes>> 3; ++i) {
+    for (; i < (num_bytes >> 3); ++i) {
         sq_dist =
             lv_creal(src0[i]) * lv_creal(src0[i]) + lv_cimag(src0[i]) * lv_cimag(src0[i]);
 
@@ -466,4 +466,65 @@ static inline void volk_32fc_index_max_16u_u_avx2_variant_1(uint16_t* target,
 
 #endif /*LV_HAVE_AVX2*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_index_max_16u_rvv(uint16_t* target, lv_32fc_t* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint16m2_t vmaxi = __riscv_vmv_v_x_u16m2(0, __riscv_vsetvlmax_e16m2());
+    vuint16m2_t vidx = __riscv_vid_v_u16m2(__riscv_vsetvlmax_e16m2());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)src0, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmflt(vmax, v, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK4(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(0, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_index_max_16u_rvvseg(uint16_t* target, lv_32fc_t* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint16m2_t vmaxi = __riscv_vmv_v_x_u16m2(0, __riscv_vsetvlmax_e16m2());
+    vuint16m2_t vidx = __riscv_vid_v_u16m2(__riscv_vsetvlmax_e16m2());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)src0, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0), vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmflt(vmax, v, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK4(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(0, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_index_max_16u_u_H*/
diff --git a/kernels/volk/volk_32fc_index_max_32u.h b/kernels/volk/volk_32fc_index_max_32u.h
index fafff48c..993187ca 100644
--- a/kernels/volk/volk_32fc_index_max_32u.h
+++ b/kernels/volk/volk_32fc_index_max_32u.h
@@ -307,7 +307,7 @@ volk_32fc_index_max_32u_generic(uint32_t* target, lv_32fc_t* src0, uint32_t num_
 
     uint32_t i = 0;
 
-    for (; i<num_bytes>> 3; ++i) {
+    for (; i < (num_bytes >> 3); ++i) {
         sq_dist =
             lv_creal(src0[i]) * lv_creal(src0[i]) + lv_cimag(src0[i]) * lv_cimag(src0[i]);
 
@@ -509,4 +509,65 @@ volk_32fc_index_max_32u_neon(uint32_t* target, lv_32fc_t* src0, uint32_t num_poi
 
 #endif /*LV_HAVE_NEON*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_index_max_32u_rvv(uint32_t* target, lv_32fc_t* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmaxi = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)src0, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmflt(vmax, v, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK4(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(0, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_index_max_32u_rvvseg(uint32_t* target, lv_32fc_t* src0, uint32_t num_points)
+{
+    vfloat32m4_t vmax = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmaxi = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, src0 += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)src0, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0), vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmflt(vmax, v, vl);
+        vmax = __riscv_vfmax_tu(vmax, vmax, v, vl);
+        vmaxi = __riscv_vmerge_tu(vmaxi, vmaxi, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float max = __riscv_vfmv_f(__riscv_vfredmax(RISCV_SHRINK4(vfmax, f, 32, vmax),
+                                                __riscv_vfmv_v_f_f32m1(0, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmax, max, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmaxi, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_index_max_32u_u_H*/
diff --git a/kernels/volk/volk_32fc_index_min_16u.h b/kernels/volk/volk_32fc_index_min_16u.h
index 6cf6d844..706db915 100644
--- a/kernels/volk/volk_32fc_index_min_16u.h
+++ b/kernels/volk/volk_32fc_index_min_16u.h
@@ -462,4 +462,67 @@ static inline void volk_32fc_index_min_16u_u_avx2_variant_1(uint16_t* target,
 
 #endif /*LV_HAVE_AVX2*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void volk_32fc_index_min_16u_rvv(uint16_t* target,
+                                               const lv_32fc_t* source,
+                                               uint32_t num_points)
+{
+    vfloat32m4_t vmin = __riscv_vfmv_v_f_f32m4(FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint16m2_t vmini = __riscv_vmv_v_x_u16m2(0, __riscv_vsetvlmax_e16m2());
+    vuint16m2_t vidx = __riscv_vid_v_u16m2(__riscv_vsetvlmax_e16m2());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, source += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)source, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmfgt(vmin, v, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK4(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void volk_32fc_index_min_16u_rvvseg(uint16_t* target,
+                                                  const lv_32fc_t* source,
+                                                  uint32_t num_points)
+{
+    vfloat32m4_t vmin = __riscv_vfmv_v_f_f32m4(FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint16m2_t vmini = __riscv_vmv_v_x_u16m2(0, __riscv_vsetvlmax_e16m2());
+    vuint16m2_t vidx = __riscv_vid_v_u16m2(__riscv_vsetvlmax_e16m2());
+    size_t n = (num_points > USHRT_MAX) ? USHRT_MAX : num_points;
+    for (size_t vl; n > 0; n -= vl, source += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)source, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0), vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmfgt(vmin, v, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e16m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK4(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_index_min_16u_u_H*/
diff --git a/kernels/volk/volk_32fc_index_min_32u.h b/kernels/volk/volk_32fc_index_min_32u.h
index 5e409b99..807a3bb5 100644
--- a/kernels/volk/volk_32fc_index_min_32u.h
+++ b/kernels/volk/volk_32fc_index_min_32u.h
@@ -504,4 +504,67 @@ static inline void volk_32fc_index_min_32u_neon(uint32_t* target,
 
 #endif /*LV_HAVE_NEON*/
 
+#ifdef LV_HAVE_RVV
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void volk_32fc_index_min_32u_rvv(uint32_t* target,
+                                               const lv_32fc_t* source,
+                                               uint32_t num_points)
+{
+    vfloat32m4_t vmin = __riscv_vfmv_v_f_f32m4(FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmini = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, source += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)source, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmfgt(vmin, v, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK4(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <float.h>
+#include <riscv_vector.h>
+
+static inline void volk_32fc_index_min_32u_rvvseg(uint32_t* target,
+                                                  const lv_32fc_t* source,
+                                                  uint32_t num_points)
+{
+    vfloat32m4_t vmin = __riscv_vfmv_v_f_f32m4(FLT_MAX, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vmini = __riscv_vmv_v_x_u32m4(0, __riscv_vsetvlmax_e32m4());
+    vuint32m4_t vidx = __riscv_vid_v_u32m4(__riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, source += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)source, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0), vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vr, vr, vl), vi, vi, vl);
+        vbool8_t m = __riscv_vmfgt(vmin, v, vl);
+        vmin = __riscv_vfmin_tu(vmin, vmin, v, vl);
+        vmini = __riscv_vmerge_tu(vmini, vmini, vidx, m, vl);
+        vidx = __riscv_vadd(vidx, vl, __riscv_vsetvlmax_e32m4());
+    }
+    size_t vl = __riscv_vsetvlmax_e32m4();
+    float min = __riscv_vfmv_f(__riscv_vfredmin(RISCV_SHRINK4(vfmin, f, 32, vmin),
+                                                __riscv_vfmv_v_f_f32m1(FLT_MAX, 1),
+                                                __riscv_vsetvlmax_e32m1()));
+    vbool8_t m = __riscv_vmfeq(vmin, min, vl);
+    *target = __riscv_vmv_x(__riscv_vslidedown(vmini, __riscv_vfirst(m, vl), vl));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_index_min_32u_u_H*/
diff --git a/kernels/volk/volk_32fc_magnitude_32f.h b/kernels/volk/volk_32fc_magnitude_32f.h
index eca00e24..7b4e44a5 100644
--- a/kernels/volk/volk_32fc_magnitude_32f.h
+++ b/kernels/volk/volk_32fc_magnitude_32f.h
@@ -420,5 +420,42 @@ static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_magnitude_32f_rvv(float* magnitudeVector,
+                                               const lv_32fc_t* complexVector,
+                                               unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(magnitudeVector, __riscv_vfsqrt(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_magnitude_32f_rvvseg(float* magnitudeVector,
+                                                  const lv_32fc_t* complexVector,
+                                                  unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(magnitudeVector, __riscv_vfsqrt(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/kernels/volk/volk_32fc_magnitude_squared_32f.h b/kernels/volk/volk_32fc_magnitude_squared_32f.h
index e7b11ae9..24fa3a9a 100644
--- a/kernels/volk/volk_32fc_magnitude_squared_32f.h
+++ b/kernels/volk/volk_32fc_magnitude_squared_32f.h
@@ -350,5 +350,42 @@ static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_magnitude_squared_32f_rvv(float* magnitudeVector,
+                                                       const lv_32fc_t* complexVector,
+                                                       unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(magnitudeVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_magnitude_squared_32f_rvvseg(float* magnitudeVector,
+                                                          const lv_32fc_t* complexVector,
+                                                          unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(magnitudeVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/kernels/volk/volk_32fc_s32f_atan2_32f.h b/kernels/volk/volk_32fc_s32f_atan2_32f.h
index 759db24c..7d98b7c2 100644
--- a/kernels/volk/volk_32fc_s32f_atan2_32f.h
+++ b/kernels/volk/volk_32fc_s32f_atan2_32f.h
@@ -344,4 +344,113 @@ static inline void volk_32fc_s32f_atan2_32f_u_avx2(float* outputVector,
 }
 #endif /* LV_HAVE_AVX2 for unaligned */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_s32f_atan2_32f_rvv(float* outputVector,
+                                                const lv_32fc_t* inputVector,
+                                                const float normalizeFactor,
+                                                unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t norm = __riscv_vfmv_v_f_f32m2(1 / normalizeFactor, vlmax);
+    const vfloat32m2_t cpi = __riscv_vfmv_v_f_f32m2(3.1415927f, vlmax);
+    const vfloat32m2_t cpio2 = __riscv_vfmv_v_f_f32m2(1.5707964f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(+0x1.ffffeap-1f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-0x1.55437p-2f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(+0x1.972be6p-3f, vlmax);
+    const vfloat32m2_t c7 = __riscv_vfmv_v_f_f32m2(-0x1.1436ap-3f, vlmax);
+    const vfloat32m2_t c9 = __riscv_vfmv_v_f_f32m2(+0x1.5785aap-4f, vlmax);
+    const vfloat32m2_t c11 = __riscv_vfmv_v_f_f32m2(-0x1.2f3004p-5f, vlmax);
+    const vfloat32m2_t c13 = __riscv_vfmv_v_f_f32m2(+0x1.01a37cp-7f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vuint64m4_t v = __riscv_vle64_v_u64m4((const uint64_t*)inputVector, vl);
+        vfloat32m2_t vr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(v, 0, vl));
+        vfloat32m2_t vi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(v, 32, vl));
+        vbool16_t mswap = __riscv_vmfgt(__riscv_vfabs(vi, vl), __riscv_vfabs(vr, vl), vl);
+        vfloat32m2_t x = __riscv_vfdiv(
+            __riscv_vmerge(vi, vr, mswap, vl), __riscv_vmerge(vr, vi, mswap, vl), vl);
+        vbool16_t mnan = __riscv_vmsgtu(__riscv_vfclass(x, vl), 0xFF, vl);
+        x = __riscv_vreinterpret_f32m2(
+            __riscv_vmerge(__riscv_vreinterpret_u32m2(x), 0, mnan, vl));
+
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+        vfloat32m2_t p = c13;
+        p = __riscv_vfmadd(p, xx, c11, vl);
+        p = __riscv_vfmadd(p, xx, c9, vl);
+        p = __riscv_vfmadd(p, xx, c7, vl);
+        p = __riscv_vfmadd(p, xx, c5, vl);
+        p = __riscv_vfmadd(p, xx, c3, vl);
+        p = __riscv_vfmadd(p, xx, c1, vl);
+        p = __riscv_vfmul(p, x, vl);
+
+        x = __riscv_vfsub(__riscv_vfsgnj(cpio2, x, vl), p, vl);
+        p = __riscv_vmerge(p, x, mswap, vl);
+        p = __riscv_vfadd_mu(
+            RISCV_VMFLTZ(32m2, vr, vl), p, p, __riscv_vfsgnjx(cpi, vi, vl), vl);
+
+        __riscv_vse32(outputVector, __riscv_vfmul(p, norm, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_s32f_atan2_32f_rvvseg(float* outputVector,
+                                                   const lv_32fc_t* inputVector,
+                                                   const float normalizeFactor,
+                                                   unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+    const vfloat32m2_t norm = __riscv_vfmv_v_f_f32m2(1 / normalizeFactor, vlmax);
+    const vfloat32m2_t cpi = __riscv_vfmv_v_f_f32m2(3.1415927f, vlmax);
+    const vfloat32m2_t cpio2 = __riscv_vfmv_v_f_f32m2(1.5707964f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(+0x1.ffffeap-1f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-0x1.55437p-2f, vlmax);
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(+0x1.972be6p-3f, vlmax);
+    const vfloat32m2_t c7 = __riscv_vfmv_v_f_f32m2(-0x1.1436ap-3f, vlmax);
+    const vfloat32m2_t c9 = __riscv_vfmv_v_f_f32m2(+0x1.5785aap-4f, vlmax);
+    const vfloat32m2_t c11 = __riscv_vfmv_v_f_f32m2(-0x1.2f3004p-5f, vlmax);
+    const vfloat32m2_t c13 = __riscv_vfmv_v_f_f32m2(+0x1.01a37cp-7f, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2x2_t v = __riscv_vlseg2e32_v_f32m2x2((const float*)inputVector, vl);
+        vfloat32m2_t vr = __riscv_vget_f32m2(v, 0), vi = __riscv_vget_f32m2(v, 1);
+        vbool16_t mswap = __riscv_vmfgt(__riscv_vfabs(vi, vl), __riscv_vfabs(vr, vl), vl);
+        vfloat32m2_t x = __riscv_vfdiv(
+            __riscv_vmerge(vi, vr, mswap, vl), __riscv_vmerge(vr, vi, mswap, vl), vl);
+        vbool16_t mnan = __riscv_vmsgtu(__riscv_vfclass(x, vl), 0xFF, vl);
+        x = __riscv_vreinterpret_f32m2(
+            __riscv_vmerge(__riscv_vreinterpret_u32m2(x), 0, mnan, vl));
+
+        vfloat32m2_t xx = __riscv_vfmul(x, x, vl);
+        vfloat32m2_t p = c13;
+        p = __riscv_vfmadd(p, xx, c11, vl);
+        p = __riscv_vfmadd(p, xx, c9, vl);
+        p = __riscv_vfmadd(p, xx, c7, vl);
+        p = __riscv_vfmadd(p, xx, c5, vl);
+        p = __riscv_vfmadd(p, xx, c3, vl);
+        p = __riscv_vfmadd(p, xx, c1, vl);
+        p = __riscv_vfmul(p, x, vl);
+
+        x = __riscv_vfsub(__riscv_vfsgnj(cpio2, x, vl), p, vl);
+        p = __riscv_vmerge(p, x, mswap, vl);
+        p = __riscv_vfadd_mu(
+            RISCV_VMFLTZ(32m2, vr, vl), p, p, __riscv_vfsgnjx(cpi, vi, vl), vl);
+
+        __riscv_vse32(outputVector, __riscv_vfmul(p, norm, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_u_H */
diff --git a/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h b/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
index c4bfc28e..51840e3b 100644
--- a/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
+++ b/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
@@ -253,4 +253,24 @@ volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
 
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_s32f_deinterleave_real_16i_rvv(int16_t* iBuffer,
+                                         const lv_32fc_t* complexVector,
+                                         const float scalar,
+                                         unsigned int num_points)
+{
+    const uint64_t* in = (const uint64_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint32m4_t vi = __riscv_vnsrl(__riscv_vle64_v_u64m8(in, vl), 0, vl);
+        vfloat32m4_t vif = __riscv_vfmul(__riscv_vreinterpret_f32m4(vi), scalar, vl);
+        __riscv_vse16(iBuffer, __riscv_vncvt_x(__riscv_vfcvt_x(vif, vl), vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
diff --git a/kernels/volk/volk_32fc_s32f_magnitude_16i.h b/kernels/volk/volk_32fc_s32f_magnitude_16i.h
index 21e12e2d..f699ed72 100644
--- a/kernels/volk/volk_32fc_s32f_magnitude_16i.h
+++ b/kernels/volk/volk_32fc_s32f_magnitude_16i.h
@@ -302,4 +302,46 @@ static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_s32f_magnitude_16i_rvv(int16_t* magnitudeVector,
+                                                    const lv_32fc_t* complexVector,
+                                                    const float scalar,
+                                                    unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        v = __riscv_vfmul(__riscv_vfsqrt(v, vl), scalar, vl);
+        __riscv_vse16(magnitudeVector, __riscv_vfncvt_x(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_s32f_magnitude_16i_rvvseg(int16_t* magnitudeVector,
+                                                       const lv_32fc_t* complexVector,
+                                                       const float scalar,
+                                                       unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexVector += vl, magnitudeVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)complexVector, vl);
+        vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
+        vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        v = __riscv_vfmul(__riscv_vfsqrt(v, vl), scalar, vl);
+        __riscv_vse16(magnitudeVector, __riscv_vfncvt_x(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */
diff --git a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
index be9aa88a..f676758e 100644
--- a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
+++ b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
@@ -142,4 +142,167 @@ volk_32fc_s32f_power_spectrum_32f_neon(float* logPowerOutput,
 
 #endif /* LV_HAVE_NEON */
 
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_s32f_power_spectrum_32f_rvv(float* logPowerOutput,
+                                                         const lv_32fc_t* complexFFTInput,
+                                                         const float normalizationFactor,
+                                                         unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+#if LOG_POLY_DEGREE == 6
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
+#elif LOG_POLY_DEGREE == 5
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
+#elif LOG_POLY_DEGREE == 4
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
+#elif LOG_POLY_DEGREE == 3
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
+#else
+#error
+#endif
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
+    const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
+    const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
+
+    const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vuint64m4_t vc = __riscv_vle64_v_u64m4((const uint64_t*)complexFFTInput, vl);
+        vfloat32m2_t vr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m2_t vi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        v = __riscv_vfmul(v, normFactSq, vl);
+
+        vfloat32m2_t a = __riscv_vfabs(v, vl);
+        vfloat32m2_t exp = __riscv_vfcvt_f(
+            __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
+            vl);
+        vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
+            __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
+
+        vfloat32m2_t mant = c0;
+        mant = __riscv_vfmadd(mant, frac, c1, vl);
+        mant = __riscv_vfmadd(mant, frac, c2, vl);
+#if LOG_POLY_DEGREE >= 4
+        mant = __riscv_vfmadd(mant, frac, c3, vl);
+#if LOG_POLY_DEGREE >= 5
+        mant = __riscv_vfmadd(mant, frac, c4, vl);
+#if LOG_POLY_DEGREE >= 6
+        mant = __riscv_vfmadd(mant, frac, c5, vl);
+#endif
+#endif
+#endif
+        v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
+        v = __riscv_vfmul(v, volk_log2to10factor, vl);
+
+        __riscv_vse32(logPowerOutput, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_s32f_power_spectrum_32f_rvvseg(float* logPowerOutput,
+                                         const lv_32fc_t* complexFFTInput,
+                                         const float normalizationFactor,
+                                         unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+
+#if LOG_POLY_DEGREE == 6
+    const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
+#elif LOG_POLY_DEGREE == 5
+    const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
+#elif LOG_POLY_DEGREE == 4
+    const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
+#elif LOG_POLY_DEGREE == 3
+    const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
+    const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
+    const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
+#else
+#error
+#endif
+
+    const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
+    const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
+    const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
+    const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
+
+    const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2x2_t vc =
+            __riscv_vlseg2e32_v_f32m2x2((const float*)complexFFTInput, vl);
+        vfloat32m2_t vr = __riscv_vget_f32m2(vc, 0);
+        vfloat32m2_t vi = __riscv_vget_f32m2(vc, 1);
+        vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        v = __riscv_vfmul(v, normFactSq, vl);
+
+        vfloat32m2_t a = __riscv_vfabs(v, vl);
+        vfloat32m2_t exp = __riscv_vfcvt_f(
+            __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
+            vl);
+        vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
+            __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
+
+        vfloat32m2_t mant = c0;
+        mant = __riscv_vfmadd(mant, frac, c1, vl);
+        mant = __riscv_vfmadd(mant, frac, c2, vl);
+#if LOG_POLY_DEGREE >= 4
+        mant = __riscv_vfmadd(mant, frac, c3, vl);
+#if LOG_POLY_DEGREE >= 5
+        mant = __riscv_vfmadd(mant, frac, c4, vl);
+#if LOG_POLY_DEGREE >= 6
+        mant = __riscv_vfmadd(mant, frac, c5, vl);
+#endif
+#endif
+#endif
+        v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
+        v = __riscv_vfmul(v, volk_log2to10factor, vl);
+
+        __riscv_vse32(logPowerOutput, v, vl);
+    }
+}
+
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
diff --git a/kernels/volk/volk_32fc_s32fc_rotator2puppet_32fc.h b/kernels/volk/volk_32fc_s32fc_rotator2puppet_32fc.h
index 3ce071ca..1ae8ad92 100644
--- a/kernels/volk/volk_32fc_s32fc_rotator2puppet_32fc.h
+++ b/kernels/volk/volk_32fc_s32fc_rotator2puppet_32fc.h
@@ -170,4 +170,34 @@ volk_32fc_s32fc_rotator2puppet_32fc_u_avx_fma(lv_32fc_t* outVector,
 
 #endif /* LV_HAVE_AVX && LV_HAVE_FMA*/
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32fc_s32fc_rotator2puppet_32fc_rvv(lv_32fc_t* outVector,
+                                                           const lv_32fc_t* inVector,
+                                                           const lv_32fc_t* phase_inc,
+                                                           unsigned int num_points)
+{
+    lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
+    (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
+    const lv_32fc_t phase_inc_n =
+        *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
+    volk_32fc_s32fc_x2_rotator2_32fc_rvv(
+        outVector, inVector, &phase_inc_n, phase, num_points);
+}
+#endif /*LV_HAVE_RVV*/
+
+
+#ifdef LV_HAVE_RVVSEG
+static inline void volk_32fc_s32fc_rotator2puppet_32fc_rvvseg(lv_32fc_t* outVector,
+                                                              const lv_32fc_t* inVector,
+                                                              const lv_32fc_t* phase_inc,
+                                                              unsigned int num_points)
+{
+    lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
+    (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
+    const lv_32fc_t phase_inc_n =
+        *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
+    volk_32fc_s32fc_x2_rotator2_32fc_rvv(
+        outVector, inVector, &phase_inc_n, phase, num_points);
+}
+#endif /*LV_HAVE_RVVSEG*/
 #endif /* INCLUDED_volk_32fc_s32fc_rotator2puppet_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_s32fc_x2_rotator2_32fc.h b/kernels/volk/volk_32fc_s32fc_x2_rotator2_32fc.h
index bee1f068..e668e3c5 100644
--- a/kernels/volk/volk_32fc_s32fc_x2_rotator2_32fc.h
+++ b/kernels/volk/volk_32fc_s32fc_x2_rotator2_32fc.h
@@ -779,4 +779,158 @@ static inline void volk_32fc_s32fc_x2_rotator2_32fc_u_avx_fma(lv_32fc_t* outVect
 
 #endif /* LV_HAVE_AVX && LV_HAVE_FMA*/
 
+/* Note on the RVV implementation:
+ * The complex multiply was expanded, because we don't care about the corner cases.
+ * Otherwise, without -ffast-math, the compiler would inserts function calls,
+ * which invalidates all vector registers and spills them on each loop iteration. */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_s32fc_x2_rotator2_32fc_rvv(lv_32fc_t* outVector,
+                                                        const lv_32fc_t* inVector,
+                                                        const lv_32fc_t* phase_inc,
+                                                        lv_32fc_t* phase,
+                                                        unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+    vlmax = vlmax < ROTATOR_RELOAD ? vlmax : ROTATOR_RELOAD;
+
+    lv_32fc_t inc = 1.0f;
+    vfloat32m2_t phr = __riscv_vfmv_v_f_f32m2(0, vlmax), phi = phr;
+    for (size_t i = 0; i < vlmax; ++i) {
+        lv_32fc_t ph =
+            lv_cmake(lv_creal(*phase) * lv_creal(inc) - lv_cimag(*phase) * lv_cimag(inc),
+                     lv_creal(*phase) * lv_cimag(inc) + lv_cimag(*phase) * lv_creal(inc));
+        phr = __riscv_vfslide1down(phr, lv_creal(ph), vlmax);
+        phi = __riscv_vfslide1down(phi, lv_cimag(ph), vlmax);
+        inc = lv_cmake(
+            lv_creal(*phase_inc) * lv_creal(inc) - lv_cimag(*phase_inc) * lv_cimag(inc),
+            lv_creal(*phase_inc) * lv_cimag(inc) + lv_cimag(*phase_inc) * lv_creal(inc));
+    }
+    vfloat32m2_t incr = __riscv_vfmv_v_f_f32m2(lv_creal(inc), vlmax);
+    vfloat32m2_t inci = __riscv_vfmv_v_f_f32m2(lv_cimag(inc), vlmax);
+
+    size_t vl = 0;
+    if (num_points > 0)
+        while (1) {
+            size_t n = num_points < ROTATOR_RELOAD ? num_points : ROTATOR_RELOAD;
+            num_points -= n;
+
+            for (; n > 0; n -= vl, inVector += vl, outVector += vl) {
+                // vl<vlmax can only happen on the last iteration of the loops
+                vl = __riscv_vsetvl_e32m2(n < vlmax ? n : vlmax);
+
+                vuint64m4_t va = __riscv_vle64_v_u64m4((const uint64_t*)inVector, vl);
+                vfloat32m2_t var = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 0, vl));
+                vfloat32m2_t vai = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 32, vl));
+
+                vfloat32m2_t vr =
+                    __riscv_vfnmsac(__riscv_vfmul(var, phr, vl), vai, phi, vl);
+                vfloat32m2_t vi =
+                    __riscv_vfmacc(__riscv_vfmul(var, phi, vl), vai, phr, vl);
+
+                vuint32m2_t vru = __riscv_vreinterpret_u32m2(vr);
+                vuint32m2_t viu = __riscv_vreinterpret_u32m2(vi);
+                vuint64m4_t res =
+                    __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+                __riscv_vse64((uint64_t*)outVector, res, vl);
+
+                vfloat32m2_t tmp = phr;
+                phr = __riscv_vfnmsac(__riscv_vfmul(tmp, incr, vl), phi, inci, vl);
+                phi = __riscv_vfmacc(__riscv_vfmul(tmp, inci, vl), phi, incr, vl);
+            }
+
+            if (num_points <= 0)
+                break;
+
+            // normalize
+            vfloat32m2_t scale =
+                __riscv_vfmacc(__riscv_vfmul(phr, phr, vl), phi, phi, vl);
+            scale = __riscv_vfsqrt(scale, vl);
+            phr = __riscv_vfdiv(phr, scale, vl);
+            phi = __riscv_vfdiv(phi, scale, vl);
+        }
+
+    lv_32fc_t ph = lv_cmake(__riscv_vfmv_f(phr), __riscv_vfmv_f(phi));
+    for (size_t i = 0; i < vlmax - vl; ++i) {
+        ph /= *phase_inc; // we're going backwards
+    }
+    *phase = ph * 1.0f / hypotf(lv_creal(ph), lv_cimag(ph));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_s32fc_x2_rotator2_32fc_rvvseg(lv_32fc_t* outVector,
+                                                           const lv_32fc_t* inVector,
+                                                           const lv_32fc_t* phase_inc,
+                                                           lv_32fc_t* phase,
+                                                           unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m2();
+    vlmax = vlmax < ROTATOR_RELOAD ? vlmax : ROTATOR_RELOAD;
+
+    lv_32fc_t inc = 1.0f;
+    vfloat32m2_t phr = __riscv_vfmv_v_f_f32m2(0, vlmax), phi = phr;
+    for (size_t i = 0; i < vlmax; ++i) {
+        lv_32fc_t ph =
+            lv_cmake(lv_creal(*phase) * lv_creal(inc) - lv_cimag(*phase) * lv_cimag(inc),
+                     lv_creal(*phase) * lv_cimag(inc) + lv_cimag(*phase) * lv_creal(inc));
+        phr = __riscv_vfslide1down(phr, lv_creal(ph), vlmax);
+        phi = __riscv_vfslide1down(phi, lv_cimag(ph), vlmax);
+        inc = lv_cmake(
+            lv_creal(*phase_inc) * lv_creal(inc) - lv_cimag(*phase_inc) * lv_cimag(inc),
+            lv_creal(*phase_inc) * lv_cimag(inc) + lv_cimag(*phase_inc) * lv_creal(inc));
+    }
+    vfloat32m2_t incr = __riscv_vfmv_v_f_f32m2(lv_creal(inc), vlmax);
+    vfloat32m2_t inci = __riscv_vfmv_v_f_f32m2(lv_cimag(inc), vlmax);
+
+    size_t vl = 0;
+    if (num_points > 0)
+        while (1) {
+            size_t n = num_points < ROTATOR_RELOAD ? num_points : ROTATOR_RELOAD;
+            num_points -= n;
+
+            for (; n > 0; n -= vl, inVector += vl, outVector += vl) {
+                // vl<vlmax can only happen on the last iteration of the loops
+                vl = __riscv_vsetvl_e32m2(n < vlmax ? n : vlmax);
+
+                vfloat32m2x2_t va =
+                    __riscv_vlseg2e32_v_f32m2x2((const float*)inVector, vl);
+                vfloat32m2_t var = __riscv_vget_f32m2(va, 0);
+                vfloat32m2_t vai = __riscv_vget_f32m2(va, 1);
+
+                vfloat32m2_t vr =
+                    __riscv_vfnmsac(__riscv_vfmul(var, phr, vl), vai, phi, vl);
+                vfloat32m2_t vi =
+                    __riscv_vfmacc(__riscv_vfmul(var, phi, vl), vai, phr, vl);
+                vfloat32m2x2_t vc = __riscv_vcreate_v_f32m2x2(vr, vi);
+                __riscv_vsseg2e32_v_f32m2x2((float*)outVector, vc, vl);
+
+                vfloat32m2_t tmp = phr;
+                phr = __riscv_vfnmsac(__riscv_vfmul(tmp, incr, vl), phi, inci, vl);
+                phi = __riscv_vfmacc(__riscv_vfmul(tmp, inci, vl), phi, incr, vl);
+            }
+
+            if (num_points <= 0)
+                break;
+
+            // normalize
+            vfloat32m2_t scale =
+                __riscv_vfmacc(__riscv_vfmul(phr, phr, vl), phi, phi, vl);
+            scale = __riscv_vfsqrt(scale, vl);
+            phr = __riscv_vfdiv(phr, scale, vl);
+            phi = __riscv_vfdiv(phi, scale, vl);
+        }
+
+    lv_32fc_t ph = lv_cmake(__riscv_vfmv_f(phr), __riscv_vfmv_f(phi));
+    for (size_t i = 0; i < vlmax - vl; ++i) {
+        ph /= *phase_inc; // we're going backwards
+    }
+    *phase = ph * 1.0f / hypotf(lv_creal(ph), lv_cimag(ph));
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_s32fc_rotator2_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_x2_add_32fc.h b/kernels/volk/volk_32fc_x2_add_32fc.h
index d3139f7c..130e5e0a 100644
--- a/kernels/volk/volk_32fc_x2_add_32fc.h
+++ b/kernels/volk/volk_32fc_x2_add_32fc.h
@@ -273,5 +273,25 @@ static inline void volk_32fc_x2_add_32fc_u_neon(lv_32fc_t* cVector,
 
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_add_32fc_rvv(lv_32fc_t* cVector,
+                                             const lv_32fc_t* aVector,
+                                             const lv_32fc_t* bVector,
+                                             unsigned int num_points)
+{
+    const float* ina = (const float*)aVector;
+    const float* inb = (const float*)bVector;
+    float* out = (float*)cVector;
+    size_t n = num_points * 2;
+    for (size_t vl; n > 0; n -= vl, ina += vl, inb += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t va = __riscv_vle32_v_f32m8(ina, vl);
+        vfloat32m8_t vb = __riscv_vle32_v_f32m8(inb, vl);
+        __riscv_vse32(out, __riscv_vfadd(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32fc_x2_add_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h b/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h
index 7b9aae3a..a5a4a9df 100644
--- a/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h
+++ b/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h
@@ -421,5 +421,72 @@ static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse3(lv_32fc_t* result
 
 #endif /*LV_HAVE_SSE3*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_rvv(lv_32fc_t* result,
+                                                            const lv_32fc_t* input,
+                                                            const lv_32fc_t* taps,
+                                                            unsigned int num_points)
+{
+    vfloat32m2_t vsumr = __riscv_vfmv_v_f_f32m2(0, __riscv_vsetvlmax_e32m2());
+    vfloat32m2_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vuint64m4_t va = __riscv_vle64_v_u64m4((const uint64_t*)input, vl);
+        vuint64m4_t vb = __riscv_vle64_v_u64m4((const uint64_t*)taps, vl);
+        vfloat32m2_t var = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 0, vl));
+        vfloat32m2_t vbr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m2_t vai = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 32, vl));
+        vfloat32m2_t vbi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vb, 32, vl));
+        vbi = __riscv_vfneg(vbi, vl);
+        vfloat32m2_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m2_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vfadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vfadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK2(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK2(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_x2_conjugate_dot_prod_32fc_rvvseg(lv_32fc_t* result,
+                                                               const lv_32fc_t* input,
+                                                               const lv_32fc_t* taps,
+                                                               unsigned int num_points)
+{
+    vfloat32m2_t vsumr = __riscv_vfmv_v_f_f32m2(0, __riscv_vsetvlmax_e32m2());
+    vfloat32m2_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vfloat32m2x2_t va = __riscv_vlseg2e32_v_f32m2x2((const float*)input, vl);
+        vfloat32m2x2_t vb = __riscv_vlseg2e32_v_f32m2x2((const float*)taps, vl);
+        vfloat32m2_t var = __riscv_vget_f32m2(va, 0), vai = __riscv_vget_f32m2(va, 1);
+        vfloat32m2_t vbr = __riscv_vget_f32m2(vb, 0), vbi = __riscv_vget_f32m2(vb, 1);
+        vbi = __riscv_vfneg(vbi, vl);
+        vfloat32m2_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m2_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vfadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vfadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK2(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK2(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H*/
diff --git a/kernels/volk/volk_32fc_x2_divide_32fc.h b/kernels/volk/volk_32fc_x2_divide_32fc.h
index 3a013cb0..ceee6559 100644
--- a/kernels/volk/volk_32fc_x2_divide_32fc.h
+++ b/kernels/volk/volk_32fc_x2_divide_32fc.h
@@ -414,5 +414,66 @@ static inline void volk_32fc_x2_divide_32fc_neon(lv_32fc_t* cVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+
+static inline void volk_32fc_x2_divide_32fc_rvv(lv_32fc_t* cVector,
+                                                const lv_32fc_t* aVector,
+                                                const lv_32fc_t* bVector,
+                                                unsigned int num_points)
+{
+    uint64_t* out = (uint64_t*)cVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t va = __riscv_vle64_v_u64m8((const uint64_t*)aVector, vl);
+        vuint64m8_t vb = __riscv_vle64_v_u64m8((const uint64_t*)bVector, vl);
+        vfloat32m4_t var = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 0, vl));
+        vfloat32m4_t vbr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m4_t vai = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 32, vl));
+        vfloat32m4_t vbi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m4_t mul = __riscv_vfrdiv(
+            __riscv_vfmacc(__riscv_vfmul(vbi, vbi, vl), vbr, vbr, vl), 1.0f, vl);
+        vfloat32m4_t vr = __riscv_vfmul(
+            __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl), mul, vl);
+        vfloat32m4_t vi = __riscv_vfmul(
+            __riscv_vfnmsac(__riscv_vfmul(vai, vbr, vl), var, vbi, vl), mul, vl);
+        vuint32m4_t vru = __riscv_vreinterpret_u32m4(vr);
+        vuint32m4_t viu = __riscv_vreinterpret_u32m4(vi);
+        vuint64m8_t v =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+        __riscv_vse64(out, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_divide_32fc_rvvseg(lv_32fc_t* cVector,
+                                                   const lv_32fc_t* aVector,
+                                                   const lv_32fc_t* bVector,
+                                                   unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)aVector, vl);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)bVector, vl);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0), vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t mul = __riscv_vfrdiv(
+            __riscv_vfmacc(__riscv_vfmul(vbi, vbi, vl), vbr, vbr, vl), 1.0f, vl);
+        vfloat32m4_t vr = __riscv_vfmul(
+            __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl), mul, vl);
+        vfloat32m4_t vi = __riscv_vfmul(
+            __riscv_vfnmsac(__riscv_vfmul(vai, vbr, vl), var, vbi, vl), mul, vl);
+        __riscv_vsseg2e32_v_f32m4x2(
+            (float*)cVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
+    }
+}
+
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* INCLUDED_volk_32fc_x2_divide_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_x2_dot_prod_32fc.h b/kernels/volk/volk_32fc_x2_dot_prod_32fc.h
index 47d6f697..d4acab3a 100644
--- a/kernels/volk/volk_32fc_x2_dot_prod_32fc.h
+++ b/kernels/volk/volk_32fc_x2_dot_prod_32fc.h
@@ -730,5 +730,70 @@ static inline void volk_32fc_x2_dot_prod_32fc_a_avx_fma(lv_32fc_t* result,
 
 #endif /*LV_HAVE_AVX && LV_HAVE_FMA*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_x2_dot_prod_32fc_rvv(lv_32fc_t* result,
+                                                  const lv_32fc_t* input,
+                                                  const lv_32fc_t* taps,
+                                                  unsigned int num_points)
+{
+    vfloat32m2_t vsumr = __riscv_vfmv_v_f_f32m2(0, __riscv_vsetvlmax_e32m2());
+    vfloat32m2_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vuint64m4_t va = __riscv_vle64_v_u64m4((const uint64_t*)input, vl);
+        vuint64m4_t vb = __riscv_vle64_v_u64m4((const uint64_t*)taps, vl);
+        vfloat32m2_t var = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 0, vl));
+        vfloat32m2_t vbr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m2_t vai = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 32, vl));
+        vfloat32m2_t vbi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m2_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m2_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vfadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vfadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK2(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK2(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+#include <volk/volk_rvv_intrinsics.h>
+
+static inline void volk_32fc_x2_dot_prod_32fc_rvvseg(lv_32fc_t* result,
+                                                     const lv_32fc_t* input,
+                                                     const lv_32fc_t* taps,
+                                                     unsigned int num_points)
+{
+    vfloat32m4_t vsumr = __riscv_vfmv_v_f_f32m4(0, __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vsumi = vsumr;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, input += vl, taps += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)input, vl);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)taps, vl);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0), vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vsumr = __riscv_vfadd_tu(vsumr, vsumr, vr, vl);
+        vsumi = __riscv_vfadd_tu(vsumi, vsumi, vi, vl);
+    }
+    size_t vl = __riscv_vsetvlmax_e32m1();
+    vfloat32m1_t vr = RISCV_SHRINK4(vfadd, f, 32, vsumr);
+    vfloat32m1_t vi = RISCV_SHRINK4(vfadd, f, 32, vsumi);
+    vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
+    *result = lv_cmake(__riscv_vfmv_f(__riscv_vfredusum(vr, z, vl)),
+                       __riscv_vfmv_f(__riscv_vfredusum(vi, z, vl)));
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H*/
diff --git a/kernels/volk/volk_32fc_x2_multiply_32fc.h b/kernels/volk/volk_32fc_x2_multiply_32fc.h
index 96cefed5..2db2929b 100644
--- a/kernels/volk/volk_32fc_x2_multiply_32fc.h
+++ b/kernels/volk/volk_32fc_x2_multiply_32fc.h
@@ -460,4 +460,55 @@ static inline void volk_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector,
 
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_multiply_32fc_rvv(lv_32fc_t* cVector,
+                                                  const lv_32fc_t* aVector,
+                                                  const lv_32fc_t* bVector,
+                                                  unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t va = __riscv_vle64_v_u64m8((const uint64_t*)aVector, vl);
+        vuint64m8_t vb = __riscv_vle64_v_u64m8((const uint64_t*)bVector, vl);
+        vfloat32m4_t var = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 0, vl));
+        vfloat32m4_t vbr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m4_t vai = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 32, vl));
+        vfloat32m4_t vbi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m4_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vuint32m4_t vru = __riscv_vreinterpret_u32m4(vr);
+        vuint32m4_t viu = __riscv_vreinterpret_u32m4(vi);
+        vuint64m8_t v =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+        __riscv_vse64((uint64_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_multiply_32fc_rvvseg(lv_32fc_t* cVector,
+                                                     const lv_32fc_t* aVector,
+                                                     const lv_32fc_t* bVector,
+                                                     unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)aVector, vl);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)bVector, vl);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0), vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t vr = __riscv_vfnmsac(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfmacc(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        __riscv_vsseg2e32_v_f32m4x2(
+            (float*)cVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h b/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h
index 12e4948a..ce01d6d6 100644
--- a/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h
+++ b/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h
@@ -287,5 +287,56 @@ static inline void volk_32fc_x2_multiply_conjugate_32fc_neon(lv_32fc_t* cVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_multiply_conjugate_32fc_rvv(lv_32fc_t* cVector,
+                                                            const lv_32fc_t* aVector,
+                                                            const lv_32fc_t* bVector,
+                                                            unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t va = __riscv_vle64_v_u64m8((const uint64_t*)aVector, vl);
+        vuint64m8_t vb = __riscv_vle64_v_u64m8((const uint64_t*)bVector, vl);
+        vfloat32m4_t var = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 0, vl));
+        vfloat32m4_t vbr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m4_t vai = __riscv_vreinterpret_f32m4(__riscv_vnsrl(va, 32, vl));
+        vfloat32m4_t vbi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m4_t vr = __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfnmsac(__riscv_vfmul(vai, vbr, vl), var, vbi, vl);
+        vuint32m4_t vru = __riscv_vreinterpret_u32m4(vr);
+        vuint32m4_t viu = __riscv_vreinterpret_u32m4(vi);
+        vuint64m8_t v =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+        __riscv_vse64((uint64_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_multiply_conjugate_32fc_rvvseg(lv_32fc_t* cVector,
+                                                               const lv_32fc_t* aVector,
+                                                               const lv_32fc_t* bVector,
+                                                               unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)aVector, vl);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)bVector, vl);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0), vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t vr = __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfnmsac(__riscv_vfmul(vai, vbr, vl), var, vbi, vl);
+        __riscv_vsseg2e32_v_f32m4x2(
+            (float*)cVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
+    }
+}
+
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */
diff --git a/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h b/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
index 54ffbf0f..0b956c20 100644
--- a/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
+++ b/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
@@ -535,4 +535,62 @@ volk_32fc_x2_s32f_square_dist_scalar_mult_32f_u_sse(float* target,
 }
 #endif // LV_HAVE_SSE
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_x2_s32f_square_dist_scalar_mult_32f_rvv(float* target,
+                                                  lv_32fc_t* src0,
+                                                  lv_32fc_t* points,
+                                                  float scalar,
+                                                  unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t var = __riscv_vfmv_v_f_f32m4(lv_creal(*src0), vlmax);
+    vfloat32m4_t vai = __riscv_vfmv_v_f_f32m4(lv_cimag(*src0), vlmax);
+    vfloat32m4_t vscale = __riscv_vfmv_v_f_f32m4(scalar, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, target += vl, points += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vb = __riscv_vle64_v_u64m8((const uint64_t*)points, vl);
+        vfloat32m4_t vbr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m4_t vbi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m4_t vr = __riscv_vfsub(var, vbr, vl);
+        vfloat32m4_t vi = __riscv_vfsub(vai, vbi, vl);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(target, __riscv_vfmul(v, vscale, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_x2_s32f_square_dist_scalar_mult_32f_rvvseg(float* target,
+                                                     lv_32fc_t* src0,
+                                                     lv_32fc_t* points,
+                                                     float scalar,
+                                                     unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t var = __riscv_vfmv_v_f_f32m4(lv_creal(*src0), vlmax);
+    vfloat32m4_t vai = __riscv_vfmv_v_f_f32m4(lv_cimag(*src0), vlmax);
+    vfloat32m4_t vscale = __riscv_vfmv_v_f_f32m4(scalar, vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, target += vl, points += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)points, vl);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0);
+        vfloat32m4_t vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t vr = __riscv_vfsub(var, vbr, vl);
+        vfloat32m4_t vi = __riscv_vfsub(vai, vbi, vl);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(target, __riscv_vfmul(v, vscale, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_u_H*/
diff --git a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc.h b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc.h
index b35bed5e..b27f7b7b 100644
--- a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc.h
+++ b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc.h
@@ -342,4 +342,69 @@ volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc_neon(lv_32fc_t* cVector,
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc_rvv(lv_32fc_t* cVector,
+                                                    const lv_32fc_t* aVector,
+                                                    const lv_32fc_t* bVector,
+                                                    const lv_32fc_t* scalar,
+                                                    unsigned int num_points)
+{
+    vfloat32m2_t vbr =
+        __riscv_vfmv_v_f_f32m2(lv_creal(*scalar), __riscv_vsetvlmax_e32m2());
+    vfloat32m2_t vbi =
+        __riscv_vfmv_v_f_f32m2(lv_cimag(*scalar), __riscv_vsetvlmax_e32m2());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, bVector += vl, aVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m2(n);
+        vuint64m4_t va = __riscv_vle64_v_u64m4((const uint64_t*)bVector, vl);
+        vuint64m4_t vc = __riscv_vle64_v_u64m4((const uint64_t*)aVector, vl);
+        vfloat32m2_t var = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 0, vl));
+        vfloat32m2_t vcr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m2_t vai = __riscv_vreinterpret_f32m2(__riscv_vnsrl(va, 32, vl));
+        vfloat32m2_t vci = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 32, vl));
+        vfloat32m2_t vr = __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m2_t vi = __riscv_vfnmsac(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vuint32m2_t vru = __riscv_vreinterpret_u32m2(__riscv_vfadd(vr, vcr, vl));
+        vuint32m2_t viu = __riscv_vreinterpret_u32m2(__riscv_vfadd(vi, vci, vl));
+        vuint64m4_t v =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+        __riscv_vse64((uint64_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void
+volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc_rvvseg(lv_32fc_t* cVector,
+                                                       const lv_32fc_t* aVector,
+                                                       const lv_32fc_t* bVector,
+                                                       const lv_32fc_t* scalar,
+                                                       unsigned int num_points)
+{
+    vfloat32m4_t vbr =
+        __riscv_vfmv_v_f_f32m4(lv_creal(*scalar), __riscv_vsetvlmax_e32m4());
+    vfloat32m4_t vbi =
+        __riscv_vfmv_v_f_f32m4(lv_cimag(*scalar), __riscv_vsetvlmax_e32m4());
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)aVector, vl);
+        vfloat32m4x2_t va = __riscv_vlseg2e32_v_f32m4x2((const float*)bVector, vl);
+        vfloat32m4_t vcr = __riscv_vget_f32m4(vc, 0), vci = __riscv_vget_f32m4(vc, 1);
+        vfloat32m4_t var = __riscv_vget_f32m4(va, 0), vai = __riscv_vget_f32m4(va, 1);
+        vfloat32m4_t vr = __riscv_vfmacc(__riscv_vfmul(var, vbr, vl), vai, vbi, vl);
+        vfloat32m4_t vi = __riscv_vfnmsac(__riscv_vfmul(var, vbi, vl), vai, vbr, vl);
+        vr = __riscv_vfadd(vr, vcr, vl);
+        vi = __riscv_vfadd(vi, vci, vl);
+        __riscv_vsseg2e32_v_f32m4x2(
+            (float*)cVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc_H */
diff --git a/kernels/volk/volk_32fc_x2_square_dist_32f.h b/kernels/volk/volk_32fc_x2_square_dist_32f.h
index 4a93d5bf..b711bcf1 100644
--- a/kernels/volk/volk_32fc_x2_square_dist_32f.h
+++ b/kernels/volk/volk_32fc_x2_square_dist_32f.h
@@ -277,7 +277,7 @@ static inline void volk_32fc_x2_square_dist_32f_generic(float* target,
     float sq_dist;
     unsigned int i = 0;
 
-    for (; i<num_bytes>> 3; ++i) {
+    for (; i < (num_bytes >> 3); ++i) {
         diff = src0[0] - points[i];
 
         sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
@@ -374,4 +374,56 @@ static inline void volk_32fc_x2_square_dist_32f_u_avx2(float* target,
 
 #endif /*LV_HAVE_AVX2*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_square_dist_32f_rvv(float* target,
+                                                    lv_32fc_t* src0,
+                                                    lv_32fc_t* points,
+                                                    unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t var = __riscv_vfmv_v_f_f32m4(lv_creal(*src0), vlmax);
+    vfloat32m4_t vai = __riscv_vfmv_v_f_f32m4(lv_cimag(*src0), vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, target += vl, points += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint64m8_t vb = __riscv_vle64_v_u64m8((const uint64_t*)points, vl);
+        vfloat32m4_t vbr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 0, vl));
+        vfloat32m4_t vbi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vb, 32, vl));
+        vfloat32m4_t vr = __riscv_vfsub(var, vbr, vl);
+        vfloat32m4_t vi = __riscv_vfsub(vai, vbi, vl);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(target, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_32fc_x2_square_dist_32f_rvvseg(float* target,
+                                                       lv_32fc_t* src0,
+                                                       lv_32fc_t* points,
+                                                       unsigned int num_points)
+{
+    size_t vlmax = __riscv_vsetvlmax_e32m4();
+    vfloat32m4_t var = __riscv_vfmv_v_f_f32m4(lv_creal(*src0), vlmax);
+    vfloat32m4_t vai = __riscv_vfmv_v_f_f32m4(lv_cimag(*src0), vlmax);
+
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, target += vl, points += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vfloat32m4x2_t vb = __riscv_vlseg2e32_v_f32m4x2((const float*)points, vl);
+        vfloat32m4_t vbr = __riscv_vget_f32m4(vb, 0);
+        vfloat32m4_t vbi = __riscv_vget_f32m4(vb, 1);
+        vfloat32m4_t vr = __riscv_vfsub(var, vbr, vl);
+        vfloat32m4_t vi = __riscv_vfsub(vai, vbi, vl);
+        vfloat32m4_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
+        __riscv_vse32(target, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /*INCLUDED_volk_32fc_x2_square_dist_32f_u_H*/
diff --git a/kernels/volk/volk_32i_s32f_convert_32f.h b/kernels/volk/volk_32i_s32f_convert_32f.h
index 678290fc..749cb1af 100644
--- a/kernels/volk/volk_32i_s32f_convert_32f.h
+++ b/kernels/volk/volk_32i_s32f_convert_32f.h
@@ -313,5 +313,21 @@ static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
 }
 #endif /* LV_HAVE_SSE2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32i_s32f_convert_32f_rvv(float* outputVector,
+                                                 const int32_t* inputVector,
+                                                 const float scalar,
+                                                 unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vfloat32m8_t v = __riscv_vfcvt_f(__riscv_vle32_v_i32m8(inputVector, vl), vl);
+        __riscv_vse32(outputVector, __riscv_vfmul(v, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
diff --git a/kernels/volk/volk_32i_x2_and_32i.h b/kernels/volk/volk_32i_x2_and_32i.h
index d2bcf6b8..79e4f221 100644
--- a/kernels/volk/volk_32i_x2_and_32i.h
+++ b/kernels/volk/volk_32i_x2_and_32i.h
@@ -337,5 +337,22 @@ static inline void volk_32i_x2_and_32i_u_avx2(int32_t* cVector,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32i_x2_and_32i_rvv(int32_t* cVector,
+                                           const int32_t* aVector,
+                                           const int32_t* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vint32m8_t va = __riscv_vle32_v_i32m8(aVector, vl);
+        vint32m8_t vb = __riscv_vle32_v_i32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vand(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32i_x2_and_32i_u_H */
diff --git a/kernels/volk/volk_32i_x2_or_32i.h b/kernels/volk/volk_32i_x2_or_32i.h
index f3e4b769..3642f13d 100644
--- a/kernels/volk/volk_32i_x2_or_32i.h
+++ b/kernels/volk/volk_32i_x2_or_32i.h
@@ -336,5 +336,22 @@ static inline void volk_32i_x2_or_32i_u_avx2(int32_t* cVector,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32i_x2_or_32i_rvv(int32_t* cVector,
+                                          const int32_t* aVector,
+                                          const int32_t* bVector,
+                                          unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vint32m8_t va = __riscv_vle32_v_i32m8(aVector, vl);
+        vint32m8_t vb = __riscv_vle32_v_i32m8(bVector, vl);
+        __riscv_vse32(cVector, __riscv_vor(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_32i_x2_or_32i_u_H */
diff --git a/kernels/volk/volk_32u_byteswap.h b/kernels/volk/volk_32u_byteswap.h
index a6ec86f8..d5d0613e 100644
--- a/kernels/volk/volk_32u_byteswap.h
+++ b/kernels/volk/volk_32u_byteswap.h
@@ -343,5 +343,53 @@ static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int n
 }
 #endif /* LV_HAVE_SSE2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32u_byteswap_rvv(uint32_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    size_t vlmax = __riscv_vsetvlmax_e8m1();
+    if (vlmax <= 256) {
+        vuint8m1_t vidx = __riscv_vreinterpret_u8m1(
+            __riscv_vsub(__riscv_vreinterpret_u32m1(__riscv_vid_v_u8m1(vlmax)),
+                         0x3020100 - 0x10203,
+                         vlmax / 4));
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e32m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle32_v_u32m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgather, v, vidx);
+            __riscv_vse32(intsToSwap, __riscv_vreinterpret_u32m8(v), vl);
+        }
+    } else {
+        vuint16m2_t vidx = __riscv_vreinterpret_u16m2(
+            __riscv_vsub(__riscv_vreinterpret_u64m2(__riscv_vid_v_u16m2(vlmax)),
+                         0x3000200010000 - 0x100020003,
+                         vlmax / 4));
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e32m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle32_v_u32m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgatherei16, v, vidx);
+            __riscv_vse32(intsToSwap, __riscv_vreinterpret_u32m8(v), vl);
+        }
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA23
+#include <riscv_vector.h>
+
+static inline void volk_32u_byteswap_rva23(uint32_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vuint32m8_t v = __riscv_vle32_v_u32m8(intsToSwap, vl);
+        __riscv_vse32(intsToSwap, __riscv_vrev8(v, vl), vl);
+    }
+}
+#endif /* LV_HAVE_RVA23 */
 
 #endif /* INCLUDED_volk_32u_byteswap_a_H */
diff --git a/kernels/volk/volk_32u_byteswappuppet_32u.h b/kernels/volk/volk_32u_byteswappuppet_32u.h
index a6ef921f..4ad3deac 100644
--- a/kernels/volk/volk_32u_byteswappuppet_32u.h
+++ b/kernels/volk/volk_32u_byteswappuppet_32u.h
@@ -91,4 +91,26 @@ static inline void volk_32u_byteswappuppet_32u_a_avx2(uint32_t* output,
 }
 #endif
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32u_byteswappuppet_32u_rvv(uint32_t* output,
+                                                   uint32_t* intsToSwap,
+                                                   unsigned int num_points)
+{
+
+    volk_32u_byteswap_rvv((uint32_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+}
+#endif
+
+#ifdef LV_HAVE_RVA23
+static inline void volk_32u_byteswappuppet_32u_rva23(uint32_t* output,
+                                                     uint32_t* intsToSwap,
+                                                     unsigned int num_points)
+{
+
+    volk_32u_byteswap_rva23((uint32_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+}
+#endif
+
 #endif
diff --git a/kernels/volk/volk_32u_popcnt.h b/kernels/volk/volk_32u_popcnt.h
index b8c371fb..3ad2f0aa 100644
--- a/kernels/volk/volk_32u_popcnt.h
+++ b/kernels/volk/volk_32u_popcnt.h
@@ -76,4 +76,22 @@ static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value)
 
 #endif /*LV_HAVE_SSE4_2*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_32u_popcnt_rvv(uint32_t* ret, const uint32_t value)
+{
+    *ret = __riscv_vcpop(__riscv_vreinterpret_b4(__riscv_vmv_s_x_u64m1(value, 1)), 32);
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVA22V
+#include <riscv_bitmanip.h>
+
+static inline void volk_32u_popcnt_rva22(uint32_t* ret, const uint32_t value)
+{
+    *ret = __riscv_cpop_32(value);
+}
+#endif /*LV_HAVE_RVA22V*/
+
 #endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/
diff --git a/kernels/volk/volk_32u_popcntpuppet_32u.h b/kernels/volk/volk_32u_popcntpuppet_32u.h
index 19a17f56..b808eb00 100644
--- a/kernels/volk/volk_32u_popcntpuppet_32u.h
+++ b/kernels/volk/volk_32u_popcntpuppet_32u.h
@@ -18,9 +18,8 @@ static inline void volk_32u_popcntpuppet_32u_generic(uint32_t* outVector,
                                                      const uint32_t* inVector,
                                                      unsigned int num_points)
 {
-    unsigned int ii;
-    for (ii = 0; ii < num_points; ++ii) {
-        volk_32u_popcnt_generic(outVector + ii, *(inVector + ii));
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_32u_popcnt_generic(outVector + i, inVector[i]);
     }
 }
 #endif /* LV_HAVE_GENERIC */
@@ -30,11 +29,32 @@ static inline void volk_32u_popcntpuppet_32u_a_sse4_2(uint32_t* outVector,
                                                       const uint32_t* inVector,
                                                       unsigned int num_points)
 {
-    unsigned int ii;
-    for (ii = 0; ii < num_points; ++ii) {
-        volk_32u_popcnt_a_sse4_2(outVector + ii, *(inVector + ii));
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_32u_popcnt_a_sse4_2(outVector + i, inVector[i]);
     }
 }
 #endif /* LV_HAVE_SSE4_2 */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_32u_popcntpuppet_32u_rvv(uint32_t* outVector,
+                                                 const uint32_t* inVector,
+                                                 unsigned int num_points)
+{
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_32u_popcnt_rvv(outVector + i, inVector[i]);
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA22V
+static inline void volk_32u_popcntpuppet_32u_rva22(uint32_t* outVector,
+                                                   const uint32_t* inVector,
+                                                   unsigned int num_points)
+{
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_32u_popcnt_rva22(outVector + i, inVector[i]);
+    }
+}
+#endif /* LV_HAVE_RVA22V */
+
 #endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/kernels/volk/volk_32u_reverse_32u.h b/kernels/volk/volk_32u_reverse_32u.h
index 62150ac6..ece8f48b 100644
--- a/kernels/volk/volk_32u_reverse_32u.h
+++ b/kernels/volk/volk_32u_reverse_32u.h
@@ -337,4 +337,57 @@ volk_32u_reverse_32u_arm(uint32_t* out, const uint32_t* in, unsigned int num_poi
 #endif /* LV_HAVE_NEON */
 
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void
+volk_32u_reverse_32u_rvv(uint32_t* out, const uint32_t* in, unsigned int num_points)
+{
+    size_t n = num_points;
+
+    static const uint64_t tblLo[] = {
+        0xE060A020C0408000,
+        0xF070B030D0509010,
+    };
+    static const uint64_t tblHi[] = {
+        0x0E060A020C040800,
+        0x0F070B030D050901,
+    };
+    vuint8m1_t vtblLo = __riscv_vreinterpret_u8m1(__riscv_vle64_v_u64m1(tblLo, 2));
+    vuint8m1_t vtblHi = __riscv_vreinterpret_u8m1(__riscv_vle64_v_u64m1(tblHi, 2));
+
+    size_t vlmax = __riscv_vsetvlmax_e8m1();
+    vuint16m2_t vidx = __riscv_vreinterpret_u16m2(
+        __riscv_vsub(__riscv_vreinterpret_u64m2(__riscv_vid_v_u16m2(vlmax)),
+                     0x3000200010000 - 0x100020003,
+                     vlmax / 4));
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m4(n);
+        vuint8m4_t v = __riscv_vreinterpret_u8m4(__riscv_vle32_v_u32m4(in, vl));
+        v = RISCV_PERM4(__riscv_vrgatherei16, v, vidx);
+        vuint8m4_t lo = __riscv_vand(v, 0xF, vl * 4);
+        lo = RISCV_LUT4(__riscv_vrgather, vtblLo, lo);
+        vuint8m4_t hi = __riscv_vsrl(v, 4, vl * 4);
+        hi = RISCV_LUT4(__riscv_vrgather, vtblHi, hi);
+        v = __riscv_vor(hi, lo, vl * 4);
+        __riscv_vse32(out, __riscv_vreinterpret_u32m4(v), vl);
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA23
+#include <riscv_vector.h>
+
+static inline void
+volk_32u_reverse_32u_rva23(uint32_t* out, const uint32_t* in, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, out += vl) {
+        vl = __riscv_vsetvl_e32m8(n);
+        vuint32m8_t v = __riscv_vle32_v_u32m8(in, vl);
+        __riscv_vse32(out, __riscv_vbrev(v, vl), vl);
+    }
+}
+#endif /* LV_HAVE_RVA23 */
+
 #endif /* INCLUDED_volk_32u_reverse_32u_u_H */
diff --git a/kernels/volk/volk_64f_convert_32f.h b/kernels/volk/volk_64f_convert_32f.h
index b5f9b507..67f6ae48 100644
--- a/kernels/volk/volk_64f_convert_32f.h
+++ b/kernels/volk/volk_64f_convert_32f.h
@@ -315,5 +315,20 @@ static inline void volk_64f_convert_32f_a_sse2(float* outputVector,
 }
 #endif /* LV_HAVE_SSE2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64f_convert_32f_rvv(float* outputVector,
+                                            const double* inputVector,
+                                            unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t v = __riscv_vle64_v_f64m8(inputVector, vl);
+        __riscv_vse32(outputVector, __riscv_vfncvt_f(v, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_64f_convert_32f_a_H */
diff --git a/kernels/volk/volk_64f_x2_add_64f.h b/kernels/volk/volk_64f_x2_add_64f.h
index 867a5d3b..bf9024e8 100644
--- a/kernels/volk/volk_64f_x2_add_64f.h
+++ b/kernels/volk/volk_64f_x2_add_64f.h
@@ -244,4 +244,22 @@ static inline void volk_64f_x2_add_64f_a_avx(double* cVector,
 
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64f_x2_add_64f_rvv(double* cVector,
+                                           const double* aVector,
+                                           const double* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vle64_v_f64m8(aVector, vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfadd(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_64f_x2_add_64f_u_H */
diff --git a/kernels/volk/volk_64f_x2_max_64f.h b/kernels/volk/volk_64f_x2_max_64f.h
index 973605c7..e9ca3ef6 100644
--- a/kernels/volk/volk_64f_x2_max_64f.h
+++ b/kernels/volk/volk_64f_x2_max_64f.h
@@ -290,5 +290,22 @@ static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64f_x2_max_64f_rvv(double* cVector,
+                                           const double* aVector,
+                                           const double* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vle64_v_f64m8(aVector, vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfmax(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
diff --git a/kernels/volk/volk_64f_x2_min_64f.h b/kernels/volk/volk_64f_x2_min_64f.h
index 970b843f..7652ef72 100644
--- a/kernels/volk/volk_64f_x2_min_64f.h
+++ b/kernels/volk/volk_64f_x2_min_64f.h
@@ -290,5 +290,22 @@ static inline void volk_64f_x2_min_64f_u_avx(double* cVector,
 }
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64f_x2_min_64f_rvv(double* cVector,
+                                           const double* aVector,
+                                           const double* bVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vle64_v_f64m8(aVector, vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfmin(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_64f_x2_min_64f_u_H */
diff --git a/kernels/volk/volk_64f_x2_multiply_64f.h b/kernels/volk/volk_64f_x2_multiply_64f.h
index caab3aaa..57eb468a 100644
--- a/kernels/volk/volk_64f_x2_multiply_64f.h
+++ b/kernels/volk/volk_64f_x2_multiply_64f.h
@@ -244,4 +244,22 @@ static inline void volk_64f_x2_multiply_64f_a_avx(double* cVector,
 
 #endif /* LV_HAVE_AVX */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64f_x2_multiply_64f_rvv(double* cVector,
+                                                const double* aVector,
+                                                const double* bVector,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vfloat64m8_t va = __riscv_vle64_v_f64m8(aVector, vl);
+        vfloat64m8_t vb = __riscv_vle64_v_f64m8(bVector, vl);
+        __riscv_vse64(cVector, __riscv_vfmul(va, vb, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_64f_x2_multiply_64f_u_H */
diff --git a/kernels/volk/volk_64u_byteswap.h b/kernels/volk/volk_64u_byteswap.h
index 2fbf3cce..a8da031c 100644
--- a/kernels/volk/volk_64u_byteswap.h
+++ b/kernels/volk/volk_64u_byteswap.h
@@ -383,4 +383,53 @@ static inline void volk_64u_byteswap_u_ssse3(uint64_t* intsToSwap,
 #endif /* LV_HAVE_SSSE3 */
 
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64u_byteswap_rvv(uint64_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    size_t vlmax = __riscv_vsetvlmax_e8m1();
+    if (vlmax <= 256) {
+        vuint8m1_t vidx = __riscv_vreinterpret_u8m1(
+            __riscv_vsub(__riscv_vreinterpret_u64m1(__riscv_vid_v_u8m1(vlmax)),
+                         0x0706050403020100 - 0x1020304050607,
+                         vlmax / 8));
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e64m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle64_v_u64m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgather, v, vidx);
+            __riscv_vse64(intsToSwap, __riscv_vreinterpret_u64m8(v), vl);
+        }
+    } else {
+        vuint16m2_t vid = __riscv_vid_v_u16m2(vlmax);
+        vuint16m2_t voff1 = __riscv_vand(vid, 0x7, vlmax);
+        vuint16m2_t voff2 = __riscv_vrsub(voff1, 0x7, vlmax);
+        vuint16m2_t vidx = __riscv_vadd(__riscv_vsub(vid, voff1, vlmax), voff2, vlmax);
+        for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+            vl = __riscv_vsetvl_e64m8(n);
+            vuint8m8_t v =
+                __riscv_vreinterpret_u8m8(__riscv_vle64_v_u64m8(intsToSwap, vl));
+            v = RISCV_PERM8(__riscv_vrgatherei16, v, vidx);
+            __riscv_vse64(intsToSwap, __riscv_vreinterpret_u64m8(v), vl);
+        }
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA23
+#include <riscv_vector.h>
+
+static inline void volk_64u_byteswap_rva23(uint64_t* intsToSwap, unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, intsToSwap += vl) {
+        vl = __riscv_vsetvl_e64m8(n);
+        vuint64m8_t v = __riscv_vle64_v_u64m8(intsToSwap, vl);
+        __riscv_vse64(intsToSwap, __riscv_vrev8(v, vl), vl);
+    }
+}
+#endif /* LV_HAVE_RVA23 */
+
 #endif /* INCLUDED_volk_64u_byteswap_a_H */
diff --git a/kernels/volk/volk_64u_byteswappuppet_64u.h b/kernels/volk/volk_64u_byteswappuppet_64u.h
index c2b55bf4..2be3b0b7 100644
--- a/kernels/volk/volk_64u_byteswappuppet_64u.h
+++ b/kernels/volk/volk_64u_byteswappuppet_64u.h
@@ -92,4 +92,26 @@ static inline void volk_64u_byteswappuppet_64u_a_avx2(uint64_t* output,
 }
 #endif
 
+#ifdef LV_HAVE_RVV
+static inline void volk_64u_byteswappuppet_64u_rvv(uint64_t* output,
+                                                   uint64_t* intsToSwap,
+                                                   unsigned int num_points)
+{
+
+    volk_64u_byteswap_rvv((uint64_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
+}
+#endif
+
+#ifdef LV_HAVE_RVA23
+static inline void volk_64u_byteswappuppet_64u_rva23(uint64_t* output,
+                                                     uint64_t* intsToSwap,
+                                                     unsigned int num_points)
+{
+
+    volk_64u_byteswap_rva23((uint64_t*)intsToSwap, num_points);
+    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
+}
+#endif
+
 #endif
diff --git a/kernels/volk/volk_64u_popcnt.h b/kernels/volk/volk_64u_popcnt.h
index 5c9b2a3a..fb12bbe1 100644
--- a/kernels/volk/volk_64u_popcnt.h
+++ b/kernels/volk/volk_64u_popcnt.h
@@ -116,5 +116,22 @@ static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
 }
 #endif /*LV_HAVE_NEON*/
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_64u_popcnt_rvv(uint64_t* ret, const uint64_t value)
+{
+    *ret = __riscv_vcpop(__riscv_vreinterpret_b2(__riscv_vmv_s_x_u64m1(value, 1)), 64);
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVA22V
+#include <riscv_bitmanip.h>
+
+static inline void volk_64u_popcnt_rva22(uint64_t* ret, const uint64_t value)
+{
+    *ret = __riscv_cpop_64(value);
+}
+#endif /*LV_HAVE_RVA22V*/
 
 #endif /*INCLUDED_volk_64u_popcnt_a_H*/
diff --git a/kernels/volk/volk_64u_popcntpuppet_64u.h b/kernels/volk/volk_64u_popcntpuppet_64u.h
index 300d4fd1..245aeba1 100644
--- a/kernels/volk/volk_64u_popcntpuppet_64u.h
+++ b/kernels/volk/volk_64u_popcntpuppet_64u.h
@@ -19,11 +19,9 @@ static inline void volk_64u_popcntpuppet_64u_generic(uint64_t* outVector,
                                                      const uint64_t* inVector,
                                                      unsigned int num_points)
 {
-    unsigned int ii;
-    for (ii = 0; ii < num_points; ++ii) {
-        volk_64u_popcnt_generic(outVector + ii, num_points);
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_64u_popcnt_generic(outVector + i, inVector[i]);
     }
-    memcpy((void*)outVector, (void*)inVector, num_points * sizeof(uint64_t));
 }
 #endif /* LV_HAVE_GENERIC */
 
@@ -32,11 +30,9 @@ static inline void volk_64u_popcntpuppet_64u_a_sse4_2(uint64_t* outVector,
                                                       const uint64_t* inVector,
                                                       unsigned int num_points)
 {
-    unsigned int ii;
-    for (ii = 0; ii < num_points; ++ii) {
-        volk_64u_popcnt_a_sse4_2(outVector + ii, num_points);
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_64u_popcnt_a_sse4_2(outVector + i, inVector[i]);
     }
-    memcpy((void*)outVector, (void*)inVector, num_points * sizeof(uint64_t));
 }
 #endif /* LV_HAVE_SSE4_2 */
 
@@ -45,12 +41,32 @@ static inline void volk_64u_popcntpuppet_64u_neon(uint64_t* outVector,
                                                   const uint64_t* inVector,
                                                   unsigned int num_points)
 {
-    unsigned int ii;
-    for (ii = 0; ii < num_points; ++ii) {
-        volk_64u_popcnt_neon(outVector + ii, num_points);
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_64u_popcnt_neon(outVector + i, inVector[i]);
     }
-    memcpy((void*)outVector, (void*)inVector, num_points * sizeof(uint64_t));
 }
 #endif /* LV_HAVE_NEON */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_64u_popcntpuppet_64u_rvv(uint64_t* outVector,
+                                                 const uint64_t* inVector,
+                                                 unsigned int num_points)
+{
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_64u_popcnt_rvv(outVector + i, inVector[i]);
+    }
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVA22V
+static inline void volk_64u_popcntpuppet_64u_rva22(uint64_t* outVector,
+                                                   const uint64_t* inVector,
+                                                   unsigned int num_points)
+{
+    for (size_t i = 0; i < num_points; ++i) {
+        volk_64u_popcnt_rva22(outVector + i, inVector[i]);
+    }
+}
+#endif /* LV_HAVE_RVA22V */
+
 #endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/kernels/volk/volk_8i_convert_16i.h b/kernels/volk/volk_8i_convert_16i.h
index 36e929bb..0800f7c5 100644
--- a/kernels/volk/volk_8i_convert_16i.h
+++ b/kernels/volk/volk_8i_convert_16i.h
@@ -266,5 +266,20 @@ static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector,
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8i_convert_16i_rvv(int16_t* outputVector,
+                                           const int8_t* inputVector,
+                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e8m4(n);
+        vint16m8_t v = __riscv_vsext_vf2(__riscv_vle8_v_i8m4(inputVector, vl), vl);
+        __riscv_vse16(outputVector, __riscv_vsll(v, 8, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED8_H */
diff --git a/kernels/volk/volk_8i_s32f_convert_32f.h b/kernels/volk/volk_8i_s32f_convert_32f.h
index d904d25d..cd2c325e 100644
--- a/kernels/volk/volk_8i_s32f_convert_32f.h
+++ b/kernels/volk/volk_8i_s32f_convert_32f.h
@@ -350,5 +350,22 @@ static inline void volk_8i_s32f_convert_32f_u_orc(float* outputVector,
 }
 #endif /* LV_HAVE_ORC */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8i_s32f_convert_32f_rvv(float* outputVector,
+                                                const int8_t* inputVector,
+                                                const float scalar,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
+        vl = __riscv_vsetvl_e8m2(n);
+        vint16m4_t v = __riscv_vsext_vf2(__riscv_vle8_v_i8m2(inputVector, vl), vl);
+        __riscv_vse32(
+            outputVector, __riscv_vfmul(__riscv_vfwcvt_f(v, vl), 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED8_H */
diff --git a/kernels/volk/volk_8ic_deinterleave_16i_x2.h b/kernels/volk/volk_8ic_deinterleave_16i_x2.h
index 46b2e2e4..87d745b8 100644
--- a/kernels/volk/volk_8ic_deinterleave_16i_x2.h
+++ b/kernels/volk/volk_8ic_deinterleave_16i_x2.h
@@ -392,4 +392,26 @@ static inline void volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
     }
 }
 #endif /* LV_HAVE_AVX2 */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_deinterleave_16i_x2_rvv(int16_t* iBuffer,
+                                                    int16_t* qBuffer,
+                                                    const lv_8sc_t* complexVector,
+                                                    unsigned int num_points)
+{
+    const uint16_t* in = (const uint16_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vuint16m8_t vc = __riscv_vle16_v_u16m8(in, vl);
+        vuint16m8_t vr = __riscv_vsll(vc, 8, vl);
+        vuint16m8_t vi = __riscv_vand(vc, 0xFF00, vl);
+        __riscv_vse16((uint16_t*)iBuffer, vr, vl);
+        __riscv_vse16((uint16_t*)qBuffer, vi, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_u_H */
diff --git a/kernels/volk/volk_8ic_deinterleave_real_16i.h b/kernels/volk/volk_8ic_deinterleave_real_16i.h
index bef47592..8814e5e1 100644
--- a/kernels/volk/volk_8ic_deinterleave_real_16i.h
+++ b/kernels/volk/volk_8ic_deinterleave_real_16i.h
@@ -300,4 +300,22 @@ static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
     }
 }
 #endif /* LV_HAVE_AVX2 */
+
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_deinterleave_real_16i_rvv(int16_t* iBuffer,
+                                                      const lv_8sc_t* complexVector,
+                                                      unsigned int num_points)
+{
+    const int16_t* in = (const int16_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vint16m8_t v = __riscv_vle16_v_i16m8(in, vl);
+        __riscv_vse16(iBuffer, __riscv_vsra(__riscv_vsll(v, 8, vl), 1, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
diff --git a/kernels/volk/volk_8ic_deinterleave_real_8i.h b/kernels/volk/volk_8ic_deinterleave_real_8i.h
index 116b1afb..2c409c69 100644
--- a/kernels/volk/volk_8ic_deinterleave_real_8i.h
+++ b/kernels/volk/volk_8ic_deinterleave_real_8i.h
@@ -402,4 +402,21 @@ static inline void volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_deinterleave_real_8i_rvv(int8_t* iBuffer,
+                                                     const lv_8sc_t* complexVector,
+                                                     unsigned int num_points)
+{
+    const uint16_t* in = (const uint16_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e16m8(n);
+        vuint16m8_t vc = __riscv_vle16_v_u16m8(in, vl);
+        __riscv_vse8((uint8_t*)iBuffer, __riscv_vnsrl(vc, 0, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H */
diff --git a/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h b/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h
index 8936a169..e0234b16 100644
--- a/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h
+++ b/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h
@@ -441,4 +441,28 @@ static inline void volk_8ic_s32f_deinterleave_32f_x2_u_avx2(float* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_s32f_deinterleave_32f_x2_rvv(float* iBuffer,
+                                                         float* qBuffer,
+                                                         const lv_8sc_t* complexVector,
+                                                         const float scalar,
+                                                         unsigned int num_points)
+{
+    const uint16_t* in = (const uint16_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl, qBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vuint16m4_t vc = __riscv_vle16_v_u16m4(in, vl);
+        vint8m2_t vr = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 0, vl));
+        vint8m2_t vi = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 8, vl));
+        vfloat32m8_t vrf = __riscv_vfwcvt_f(__riscv_vsext_vf2(vr, vl), vl);
+        vfloat32m8_t vif = __riscv_vfwcvt_f(__riscv_vsext_vf2(vi, vl), vl);
+        __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
+        __riscv_vse32(qBuffer, __riscv_vfmul(vif, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H */
diff --git a/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h b/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
index 37cb2555..7ec8958d 100644
--- a/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
+++ b/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
@@ -349,5 +349,24 @@ volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_s32f_deinterleave_real_32f_rvv(float* iBuffer,
+                                                           const lv_8sc_t* complexVector,
+                                                           const float scalar,
+                                                           unsigned int num_points)
+{
+    const uint16_t* in = (const uint16_t*)complexVector;
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
+        vl = __riscv_vsetvl_e16m4(n);
+        vuint16m4_t vc = __riscv_vle16_v_u16m4(in, vl);
+        vint8m2_t vr = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 0, vl));
+        vfloat32m8_t vrf = __riscv_vfwcvt_f(__riscv_vsext_vf2(vr, vl), vl);
+        __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
 
 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
diff --git a/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h b/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
index 5462ea67..5de0e312 100644
--- a/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
+++ b/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
@@ -274,4 +274,55 @@ static inline void volk_8ic_x2_multiply_conjugate_16ic_u_avx2(lv_16sc_t* cVector
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_x2_multiply_conjugate_16ic_rvv(lv_16sc_t* cVector,
+                                                           const lv_8sc_t* aVector,
+                                                           const lv_8sc_t* bVector,
+                                                           unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e8m2(n);
+        vint16m4_t va = __riscv_vle16_v_i16m4((const int16_t*)aVector, vl);
+        vint16m4_t vb = __riscv_vle16_v_i16m4((const int16_t*)bVector, vl);
+        vint8m2_t var = __riscv_vnsra(va, 0, vl), vai = __riscv_vnsra(va, 8, vl);
+        vint8m2_t vbr = __riscv_vnsra(vb, 0, vl), vbi = __riscv_vnsra(vb, 8, vl);
+        vint16m4_t vr = __riscv_vwmacc(__riscv_vwmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi =
+            __riscv_vsub(__riscv_vwmul(vai, vbr, vl), __riscv_vwmul(var, vbi, vl), vl);
+        vuint16m4_t vru = __riscv_vreinterpret_u16m4(vr);
+        vuint16m4_t viu = __riscv_vreinterpret_u16m4(vi);
+        vuint32m8_t v = __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFF, viu, vl);
+        __riscv_vse32((uint32_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_8ic_x2_multiply_conjugate_16ic_rvvseg(lv_16sc_t* cVector,
+                                                              const lv_8sc_t* aVector,
+                                                              const lv_8sc_t* bVector,
+                                                              unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e8m2(n);
+        vint8m2x2_t va = __riscv_vlseg2e8_v_i8m2x2((const int8_t*)aVector, vl);
+        vint8m2x2_t vb = __riscv_vlseg2e8_v_i8m2x2((const int8_t*)bVector, vl);
+        vint8m2_t var = __riscv_vget_i8m2(va, 0), vai = __riscv_vget_i8m2(va, 1);
+        vint8m2_t vbr = __riscv_vget_i8m2(vb, 0), vbi = __riscv_vget_i8m2(vb, 1);
+        vint16m4_t vr = __riscv_vwmacc(__riscv_vwmul(var, vbr, vl), vai, vbi, vl);
+        vint16m4_t vi =
+            __riscv_vsub(__riscv_vwmul(vai, vbr, vl), __riscv_vwmul(var, vbi, vl), vl);
+        __riscv_vsseg2e16_v_i16m4x2(
+            (int16_t*)cVector, __riscv_vcreate_v_i16m4x2(vr, vi), vl);
+    }
+}
+
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_u_H */
diff --git a/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h b/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h
index 318a7819..5316ada0 100644
--- a/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h
+++ b/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h
@@ -341,4 +341,63 @@ volk_8ic_x2_s32f_multiply_conjugate_32fc_u_avx2(lv_32fc_t* cVector,
 #endif /* LV_HAVE_AVX2*/
 
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_rvv(lv_32fc_t* cVector,
+                                                                const lv_8sc_t* aVector,
+                                                                const lv_8sc_t* bVector,
+                                                                const float scalar,
+                                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e8m1(n);
+        vint16m2_t va = __riscv_vle16_v_i16m2((const int16_t*)aVector, vl);
+        vint16m2_t vb = __riscv_vle16_v_i16m2((const int16_t*)bVector, vl);
+        vint8m1_t var = __riscv_vnsra(va, 0, vl), vai = __riscv_vnsra(va, 8, vl);
+        vint8m1_t vbr = __riscv_vnsra(vb, 0, vl), vbi = __riscv_vnsra(vb, 8, vl);
+        vint16m2_t vr = __riscv_vwmacc(__riscv_vwmul(var, vbr, vl), vai, vbi, vl);
+        vint16m2_t vi =
+            __riscv_vsub(__riscv_vwmul(vai, vbr, vl), __riscv_vwmul(var, vbi, vl), vl);
+        vfloat32m4_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), 1.0 / scalar, vl);
+        vfloat32m4_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), 1.0 / scalar, vl);
+        vuint32m4_t vru = __riscv_vreinterpret_u32m4(vrf);
+        vuint32m4_t viu = __riscv_vreinterpret_u32m4(vif);
+        vuint64m8_t v =
+            __riscv_vwmaccu(__riscv_vwaddu_vv(vru, viu, vl), 0xFFFFFFFF, viu, vl);
+        __riscv_vse64((uint64_t*)cVector, v, vl);
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void
+volk_8ic_x2_s32f_multiply_conjugate_32fc_rvvseg(lv_32fc_t* cVector,
+                                                const lv_8sc_t* aVector,
+                                                const lv_8sc_t* bVector,
+                                                const float scalar,
+                                                unsigned int num_points)
+{
+    size_t n = num_points;
+    for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
+        vl = __riscv_vsetvl_e8m1(n);
+        vint8m1x2_t va = __riscv_vlseg2e8_v_i8m1x2((const int8_t*)aVector, vl);
+        vint8m1x2_t vb = __riscv_vlseg2e8_v_i8m1x2((const int8_t*)bVector, vl);
+        vint8m1_t var = __riscv_vget_i8m1(va, 0), vai = __riscv_vget_i8m1(va, 1);
+        vint8m1_t vbr = __riscv_vget_i8m1(vb, 0), vbi = __riscv_vget_i8m1(vb, 1);
+        vint16m2_t vr = __riscv_vwmacc(__riscv_vwmul(var, vbr, vl), vai, vbi, vl);
+        vint16m2_t vi =
+            __riscv_vsub(__riscv_vwmul(vai, vbr, vl), __riscv_vwmul(var, vbi, vl), vl);
+        vfloat32m4_t vrf = __riscv_vfmul(__riscv_vfwcvt_f(vr, vl), 1.0 / scalar, vl);
+        vfloat32m4_t vif = __riscv_vfmul(__riscv_vfwcvt_f(vi, vl), 1.0 / scalar, vl);
+        __riscv_vsseg2e32_v_f32m4x2(
+            (float*)cVector, __riscv_vcreate_v_f32m4x2(vrf, vif), vl);
+    }
+}
+
+#endif /*LV_HAVE_RVVSEG*/
+
 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H */
diff --git a/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h b/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
index 51963efd..5314622b 100644
--- a/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
+++ b/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
@@ -20,11 +20,14 @@ typedef union {
     unsigned int* w;
 } p_decision_t;
 
-static inline int parity(int x, unsigned char* Partab)
+static inline int parity(int x)
 {
-    x ^= (x >> 16);
-    x ^= (x >> 8);
-    return Partab[x];
+    x ^= x >> 16;
+    x ^= x >> 8;
+    x ^= x >> 4;
+    x ^= x >> 2;
+    x ^= x >> 1;
+    return x & 1;
 }
 
 static inline int chainback_viterbi(unsigned char* data,
@@ -113,7 +116,6 @@ static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
     static unsigned char* X;
     static unsigned int excess = 6;
     static unsigned char* Branchtab;
-    static unsigned char Partab[256];
 
     int d_polys[2] = { 79, 109 };
 
@@ -127,24 +129,12 @@ static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
         D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
                                         volk_get_alignment());
         int state, i;
-        int cnt, ti;
-
-        /* Initialize parity lookup table */
-        for (i = 0; i < 256; i++) {
-            cnt = 0;
-            ti = i;
-            while (ti) {
-                if (ti & 1)
-                    cnt++;
-                ti >>= 1;
-            }
-            Partab[i] = cnt & 1;
-        }
+
         /*  Initialize the branch table */
         for (state = 0; state < d_numstates / 2; state++) {
             for (i = 0; i < rate; i++) {
                 Branchtab[i * d_numstates / 2 + state] =
-                    parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
+                    parity((2 * state) & d_polys[i]) ? 255 : 0;
             }
         }
 
@@ -195,7 +185,6 @@ static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
     static unsigned char* X;
     static unsigned int excess = 6;
     static unsigned char* Branchtab;
-    static unsigned char Partab[256];
 
     int d_polys[2] = { 79, 109 };
 
@@ -209,24 +198,12 @@ static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
         D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
                                         volk_get_alignment());
         int state, i;
-        int cnt, ti;
-
-        /* Initialize parity lookup table */
-        for (i = 0; i < 256; i++) {
-            cnt = 0;
-            ti = i;
-            while (ti) {
-                if (ti & 1)
-                    cnt++;
-                ti >>= 1;
-            }
-            Partab[i] = cnt & 1;
-        }
+
         /*  Initialize the branch table */
         for (state = 0; state < d_numstates / 2; state++) {
             for (i = 0; i < rate; i++) {
                 Branchtab[i * d_numstates / 2 + state] =
-                    parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
+                    parity((2 * state) & d_polys[i]) ? 255 : 0;
             }
         }
 
@@ -280,7 +257,6 @@ static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
     static unsigned char* X;
     static unsigned int excess = 6;
     static unsigned char* Branchtab;
-    static unsigned char Partab[256];
 
     int d_polys[2] = { 79, 109 };
 
@@ -294,24 +270,12 @@ static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
         D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
                                         volk_get_alignment());
         int state, i;
-        int cnt, ti;
-
-        /* Initialize parity lookup table */
-        for (i = 0; i < 256; i++) {
-            cnt = 0;
-            ti = i;
-            while (ti) {
-                if (ti & 1)
-                    cnt++;
-                ti >>= 1;
-            }
-            Partab[i] = cnt & 1;
-        }
+
         /*  Initialize the branch table */
         for (state = 0; state < d_numstates / 2; state++) {
             for (i = 0; i < rate; i++) {
                 Branchtab[i * d_numstates / 2 + state] =
-                    parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
+                    parity((2 * state) & d_polys[i]) ? 255 : 0;
             }
         }
 
@@ -363,7 +327,6 @@ static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
     static unsigned char* D;
     static unsigned int excess = 6;
     static unsigned char* Branchtab;
-    static unsigned char Partab[256];
 
     int d_polys[2] = { 79, 109 };
 
@@ -378,24 +341,12 @@ static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
                                         volk_get_alignment());
 
         int state, i;
-        int cnt, ti;
-
-        /* Initialize parity lookup table */
-        for (i = 0; i < 256; i++) {
-            cnt = 0;
-            ti = i;
-            while (ti) {
-                if (ti & 1)
-                    cnt++;
-                ti >>= 1;
-            }
-            Partab[i] = cnt & 1;
-        }
+
         /*  Initialize the branch table */
         for (state = 0; state < d_numstates / 2; state++) {
             for (i = 0; i < rate; i++) {
                 Branchtab[i * d_numstates / 2 + state] =
-                    parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
+                    parity((2 * state) & d_polys[i]) ? 255 : 0;
             }
         }
 
@@ -427,4 +378,59 @@ static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
 
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8u_conv_k7_r2puppet_8u_rvv(unsigned char* dec,
+                                                   unsigned char* syms,
+                                                   unsigned int framebits)
+{
+    if (framebits < 12)
+        return;
+
+    int d_numstates = (1 << 6);
+    static unsigned char* D;
+    static unsigned char* Y;
+    static unsigned char* X;
+    static unsigned int excess = 6;
+    static unsigned char* Branchtab;
+
+    static int once = 1;
+    if (once) {
+        once = 0;
+
+        X = (unsigned char*)volk_malloc(3 * d_numstates, volk_get_alignment());
+        Y = X + d_numstates;
+        Branchtab = Y + d_numstates;
+        D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
+                                        volk_get_alignment());
+
+        /*  Initialize the branch table */
+        for (size_t state = 0; state < d_numstates / 2; state++) {
+            Branchtab[state] = parity(state & 39) * 255;
+            Branchtab[state + d_numstates / 2] = parity(state & 54) * 255;
+        }
+    }
+
+    memset(X, 31, d_numstates);                        // unbias the old_metrics
+    memset(D, 0, (d_numstates / 8) * (framebits + 6)); // initialize decisions
+
+    volk_8u_x4_conv_k7_r2_8u_rvv(
+        Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
+
+    unsigned int min = X[0];
+    int i = 0, state = 0;
+    for (i = 0; i < d_numstates; ++i) {
+        if (X[i] < min) {
+            min = X[i];
+            state = i;
+        }
+    }
+
+    chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
+
+    return;
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
diff --git a/kernels/volk/volk_8u_x2_encodeframepolar_8u.h b/kernels/volk/volk_8u_x2_encodeframepolar_8u.h
index 1464218a..5d03f03d 100644
--- a/kernels/volk/volk_8u_x2_encodeframepolar_8u.h
+++ b/kernels/volk/volk_8u_x2_encodeframepolar_8u.h
@@ -1153,5 +1153,84 @@ static inline void volk_8u_x2_encodeframepolar_8u_a_avx2(unsigned char* frame,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8u_x2_encodeframepolar_8u_rvv(unsigned char* frame,
+                                                      unsigned char* temp,
+                                                      unsigned int frame_size)
+{
+    unsigned int stage = log2_of_power_of_2(frame_size);
+    unsigned int frame_half = frame_size >> 1;
+    unsigned int num_branches = 1;
+
+    while (stage) {
+        // encode stage
+        if (frame_half < 8) {
+            encodepolar_single_stage(frame, temp, num_branches, frame_half);
+        } else {
+            unsigned char *in = temp, *out = frame;
+            for (size_t branch = 0; branch < num_branches; ++branch) {
+                size_t n = frame_half;
+                for (size_t vl; n > 0; n -= vl, in += vl * 2, out += vl) {
+                    vl = __riscv_vsetvl_e8m1(n);
+                    vuint16m2_t vc = __riscv_vle16_v_u16m2((uint16_t*)in, vl);
+                    vuint8m1_t v1 = __riscv_vnsrl(vc, 0, vl);
+                    vuint8m1_t v2 = __riscv_vnsrl(vc, 8, vl);
+                    __riscv_vse8(out, __riscv_vxor(v1, v2, vl), vl);
+                    __riscv_vse8(out + frame_half, v2, vl);
+                }
+                out += frame_half;
+            }
+        }
+        memcpy(temp, frame, sizeof(unsigned char) * frame_size);
+
+        // update all the parameters.
+        num_branches = num_branches << 1;
+        frame_half = frame_half >> 1;
+        --stage;
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
+#ifdef LV_HAVE_RVVSEG
+#include <riscv_vector.h>
+
+static inline void volk_8u_x2_encodeframepolar_8u_rvvseg(unsigned char* frame,
+                                                         unsigned char* temp,
+                                                         unsigned int frame_size)
+{
+    unsigned int stage = log2_of_power_of_2(frame_size);
+    unsigned int frame_half = frame_size >> 1;
+    unsigned int num_branches = 1;
+
+    while (stage) {
+        // encode stage
+        if (frame_half < 8) {
+            encodepolar_single_stage(frame, temp, num_branches, frame_half);
+        } else {
+            unsigned char *in = temp, *out = frame;
+            for (size_t branch = 0; branch < num_branches; ++branch) {
+                size_t n = frame_half;
+                for (size_t vl; n > 0; n -= vl, in += vl * 2, out += vl) {
+                    vl = __riscv_vsetvl_e8m1(n);
+                    vuint8m1x2_t vc = __riscv_vlseg2e8_v_u8m1x2(in, vl);
+                    vuint8m1_t v1 = __riscv_vget_u8m1(vc, 0);
+                    vuint8m1_t v2 = __riscv_vget_u8m1(vc, 1);
+                    __riscv_vse8(out, __riscv_vxor(v1, v2, vl), vl);
+                    __riscv_vse8(out + frame_half, v2, vl);
+                }
+                out += frame_half;
+            }
+        }
+        memcpy(temp, frame, sizeof(unsigned char) * frame_size);
+
+        // update all the parameters.
+        num_branches = num_branches << 1;
+        frame_half = frame_half >> 1;
+        --stage;
+    }
+}
+#endif /*LV_HAVE_RVVSEG*/
 
 #endif /* VOLK_KERNELS_VOLK_VOLK_8U_X2_ENCODEFRAMEPOLAR_8U_A_H_ */
diff --git a/kernels/volk/volk_8u_x3_encodepolar_8u_x2.h b/kernels/volk/volk_8u_x3_encodepolar_8u_x2.h
index 4c45f757..e54befa4 100644
--- a/kernels/volk/volk_8u_x3_encodepolar_8u_x2.h
+++ b/kernels/volk/volk_8u_x3_encodepolar_8u_x2.h
@@ -169,4 +169,33 @@ volk_8u_x3_encodepolar_8u_x2_a_avx2(unsigned char* frame,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_8u_x3_encodepolar_8u_x2_rvv(unsigned char* frame,
+                                                    unsigned char* temp,
+                                                    const unsigned char* frozen_bit_mask,
+                                                    const unsigned char* frozen_bits,
+                                                    const unsigned char* info_bits,
+                                                    unsigned int frame_size)
+{
+    interleave_frozen_and_info_bits(
+        temp, frozen_bit_mask, frozen_bits, info_bits, frame_size);
+    volk_8u_x2_encodeframepolar_8u_rvv(frame, temp, frame_size);
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVVSEG
+static inline void
+volk_8u_x3_encodepolar_8u_x2_rvvseg(unsigned char* frame,
+                                    unsigned char* temp,
+                                    const unsigned char* frozen_bit_mask,
+                                    const unsigned char* frozen_bits,
+                                    const unsigned char* info_bits,
+                                    unsigned int frame_size)
+{
+    interleave_frozen_and_info_bits(
+        temp, frozen_bit_mask, frozen_bits, info_bits, frame_size);
+    volk_8u_x2_encodeframepolar_8u_rvvseg(frame, temp, frame_size);
+}
+#endif /* LV_HAVE_RVVSEG */
+
 #endif /* VOLK_KERNELS_VOLK_VOLK_8U_X3_ENCODEPOLAR_8U_X2_A_H_ */
diff --git a/kernels/volk/volk_8u_x3_encodepolarpuppet_8u.h b/kernels/volk/volk_8u_x3_encodepolarpuppet_8u.h
index 496ca2e5..792168e0 100644
--- a/kernels/volk/volk_8u_x3_encodepolarpuppet_8u.h
+++ b/kernels/volk/volk_8u_x3_encodepolarpuppet_8u.h
@@ -156,5 +156,47 @@ volk_8u_x3_encodepolarpuppet_8u_a_avx2(unsigned char* frame,
 }
 #endif /* LV_HAVE_AVX2 */
 
+#ifdef LV_HAVE_RVV
+static inline void volk_8u_x3_encodepolarpuppet_8u_rvv(unsigned char* frame,
+                                                       unsigned char* frozen_bit_mask,
+                                                       const unsigned char* frozen_bits,
+                                                       const unsigned char* info_bits,
+                                                       unsigned int frame_size)
+{
+    if (frame_size < 1) {
+        return;
+    }
+
+    frame_size = next_lower_power_of_two(frame_size);
+    unsigned char* temp = (unsigned char*)volk_malloc(sizeof(unsigned char) * frame_size,
+                                                      volk_get_alignment());
+    adjust_frozen_mask(frozen_bit_mask, frame_size);
+    volk_8u_x3_encodepolar_8u_x2_rvv(
+        frame, temp, frozen_bit_mask, frozen_bits, info_bits, frame_size);
+    volk_free(temp);
+}
+#endif /* LV_HAVE_RVV */
+
+#ifdef LV_HAVE_RVVSEG
+static inline void
+volk_8u_x3_encodepolarpuppet_8u_rvvseg(unsigned char* frame,
+                                       unsigned char* frozen_bit_mask,
+                                       const unsigned char* frozen_bits,
+                                       const unsigned char* info_bits,
+                                       unsigned int frame_size)
+{
+    if (frame_size < 1) {
+        return;
+    }
+
+    frame_size = next_lower_power_of_two(frame_size);
+    unsigned char* temp = (unsigned char*)volk_malloc(sizeof(unsigned char) * frame_size,
+                                                      volk_get_alignment());
+    adjust_frozen_mask(frozen_bit_mask, frame_size);
+    volk_8u_x3_encodepolar_8u_x2_rvvseg(
+        frame, temp, frozen_bit_mask, frozen_bits, info_bits, frame_size);
+    volk_free(temp);
+}
+#endif /* LV_HAVE_RVVSEG */
 
 #endif /* VOLK_KERNELS_VOLK_VOLK_8U_X3_ENCODEPOLARPUPPET_8U_A_H_ */
diff --git a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
index 9750b665..cb2db11a 100644
--- a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
+++ b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
@@ -63,11 +63,14 @@ static inline void renormalize(unsigned char* X)
     int i;
 
     unsigned char min = X[0];
-    for (i = 0; i < NUMSTATES; i++)
-        if (min > X[i])
+    for (i = 0; i < NUMSTATES; i++) {
+        if (min > X[i]) {
             min = X[i];
-    for (i = 0; i < NUMSTATES; i++)
+        }
+    }
+    for (i = 0; i < NUMSTATES; i++) {
         X[i] -= min;
+    }
 }
 
 
@@ -91,8 +94,9 @@ static inline void BFLY(int i,
     int PRECISIONSHIFT = 2;
 
     metricsum = 1;
-    for (j = 0; j < RATE; j++)
+    for (j = 0; j < RATE; j++) {
         metricsum += (Branchtab[i + j * NUMSTATES / 2] ^ syms[s * RATE + j]);
+    }
     metric = (metricsum >> METRICSHIFT) >> PRECISIONSHIFT;
 
     unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
@@ -465,4 +469,210 @@ static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y,
 
 #endif /* LV_HAVE_GENERIC */
 
+#if LV_HAVE_RVV
+#include <riscv_vector.h>
+
+static inline void volk_8u_x4_conv_k7_r2_8u_rvv(unsigned char* Y,
+                                                unsigned char* X,
+                                                unsigned char* syms,
+                                                unsigned char* dec,
+                                                unsigned int framebits,
+                                                unsigned int excess,
+                                                unsigned char* Branchtab)
+{
+    size_t vl = 256 / 8;
+
+    size_t n = framebits + excess;
+
+    if (__riscv_vlenb() == 128 / 8) {
+        vuint8m2_t vX0 = __riscv_vle8_v_u8m2(X, vl),
+                   vX1 = __riscv_vle8_v_u8m2(X + vl, vl);
+        vuint8m2_t vY0 = __riscv_vle8_v_u8m2(Y, vl),
+                   vY1 = __riscv_vle8_v_u8m2(Y + vl, vl);
+        vuint8m2_t vB0 = __riscv_vle8_v_u8m2(Branchtab, vl);
+        vuint8m2_t vB1 = __riscv_vle8_v_u8m2(Branchtab + vl, vl);
+        vuint8m2_t v63 = __riscv_vmv_v_x_u8m2(63, vl);
+
+        for (size_t i = 0; i < n; ++i) {
+            // Butterfly
+            vuint8m2_t va0 = __riscv_vxor(vB0, syms[2 * i + 0], vl);
+            vuint8m2_t va1 = __riscv_vxor(vB1, syms[2 * i + 1], vl);
+            vuint8m2_t va = __riscv_vaaddu(va0, va1, 0, vl);
+            va = __riscv_vreinterpret_u8m2(
+                __riscv_vsrl(__riscv_vreinterpret_u16m2(va), 2, vl / 2));
+            va = __riscv_vand(va, v63, vl);
+            vuint8m2_t vb = __riscv_vssubu(v63, va, vl);
+            vuint8m2_t vX0a = __riscv_vsaddu(vX0, va, vl);
+            vuint8m2_t vX1b = __riscv_vsaddu(vX1, vb, vl);
+            vuint8m2_t vX0b = __riscv_vsaddu(vX0, vb, vl);
+            vuint8m2_t vX1a = __riscv_vsaddu(vX1, va, vl);
+            vY0 = __riscv_vminu(vX1b, vX0a, vl);
+            vY1 = __riscv_vminu(vX1a, vX0b, vl);
+
+            vuint16m4_t vX1ba =
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vX1b, vX1a, vl), 0xFF, vX1a, vl);
+            vX1b = __riscv_vget_u8m2(__riscv_vreinterpret_u8m4(vX1ba), 0);
+            vX1a = __riscv_vget_u8m2(__riscv_vreinterpret_u8m4(vX1ba), 1);
+
+            vuint16m4_t vm =
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vY0, vY1, vl), 0xFF, vY1, vl);
+            vY0 = __riscv_vget_u8m2(__riscv_vreinterpret_u8m4(vm), 0);
+            vY1 = __riscv_vget_u8m2(__riscv_vreinterpret_u8m4(vm), 1);
+
+            __riscv_vsm(&dec[8 * i + 0], __riscv_vmseq(vY0, vX1b, vl), vl);
+            __riscv_vsm(&dec[8 * i + 4], __riscv_vmseq(vY1, vX1a, vl), vl);
+
+            // Renormalize
+            vuint8m2_t vmin = __riscv_vminu(vY0, vY1, vl);
+            vmin = __riscv_vlmul_ext_u8m2(
+                __riscv_vredminu(vmin, __riscv_vlmul_trunc_u8m1(vmin), vl));
+            vmin = __riscv_vrgather(vmin, 0, vl);
+            vY0 = __riscv_vsub(vY0, vmin, vl);
+            vY1 = __riscv_vsub(vY1, vmin, vl);
+
+            vuint8m2_t tmp; // Swap pointers to old and new metrics
+            tmp = vX0;
+            vX0 = vY0;
+            vY0 = tmp;
+            tmp = vX1;
+            vX1 = vY1;
+            vY1 = tmp;
+        }
+        if (n & 1) {
+            __riscv_vse8(X, vY0, vl);
+            __riscv_vse8(X + vl, vY1, vl);
+            __riscv_vse8(Y, vX0, vl);
+            __riscv_vse8(Y + vl, vX1, vl);
+        } else {
+            __riscv_vse8(X, vX0, vl);
+            __riscv_vse8(X + vl, vX1, vl);
+            __riscv_vse8(Y, vY0, vl);
+            __riscv_vse8(Y + vl, vY1, vl);
+        }
+    } else if (__riscv_vlenb() == 256 / 8) {
+        vuint8m1_t vX0 = __riscv_vle8_v_u8m1(X, vl),
+                   vX1 = __riscv_vle8_v_u8m1(X + vl, vl);
+        vuint8m1_t vY0 = __riscv_vle8_v_u8m1(Y, vl),
+                   vY1 = __riscv_vle8_v_u8m1(Y + vl, vl);
+        vuint8m1_t vB0 = __riscv_vle8_v_u8m1(Branchtab, vl);
+        vuint8m1_t vB1 = __riscv_vle8_v_u8m1(Branchtab + vl, vl);
+        vuint8m1_t v63 = __riscv_vmv_v_x_u8m1(63, vl);
+
+        for (size_t i = 0; i < n; ++i) {
+            // Butterfly
+            vuint8m1_t va0 = __riscv_vxor(vB0, syms[2 * i + 0], vl);
+            vuint8m1_t va1 = __riscv_vxor(vB1, syms[2 * i + 1], vl);
+            vuint8m1_t va = __riscv_vaaddu(va0, va1, 0, vl);
+            va = __riscv_vreinterpret_u8m1(
+                __riscv_vsrl(__riscv_vreinterpret_u16m1(va), 2, vl / 2));
+            va = __riscv_vand(va, v63, vl);
+            vuint8m1_t vb = __riscv_vssubu(v63, va, vl);
+            vuint8m1_t vX0a = __riscv_vsaddu(vX0, va, vl);
+            vuint8m1_t vX1b = __riscv_vsaddu(vX1, vb, vl);
+            vuint8m1_t vX0b = __riscv_vsaddu(vX0, vb, vl);
+            vuint8m1_t vX1a = __riscv_vsaddu(vX1, va, vl);
+            vY0 = __riscv_vminu(vX1b, vX0a, vl);
+            vY1 = __riscv_vminu(vX1a, vX0b, vl);
+
+            vuint16m2_t vX1ba =
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vX1b, vX1a, vl), 0xFF, vX1a, vl);
+            vX1b = __riscv_vget_u8m1(__riscv_vreinterpret_u8m2(vX1ba), 0);
+            vX1a = __riscv_vget_u8m1(__riscv_vreinterpret_u8m2(vX1ba), 1);
+
+            vuint16m2_t vm =
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vY0, vY1, vl), 0xFF, vY1, vl);
+            vY0 = __riscv_vget_u8m1(__riscv_vreinterpret_u8m2(vm), 0);
+            vY1 = __riscv_vget_u8m1(__riscv_vreinterpret_u8m2(vm), 1);
+
+            __riscv_vsm(&dec[8 * i + 0], __riscv_vmseq(vY0, vX1b, vl), vl);
+            __riscv_vsm(&dec[8 * i + 4], __riscv_vmseq(vY1, vX1a, vl), vl);
+
+            // Renormalize
+            vuint8m1_t vmin = __riscv_vminu(vY0, vY1, vl);
+            vmin = __riscv_vrgather(__riscv_vredminu(vmin, vmin, vl), 0, vl);
+            vY0 = __riscv_vsub(vY0, vmin, vl);
+            vY1 = __riscv_vsub(vY1, vmin, vl);
+
+            vuint8m1_t tmp; // Swap pointers to old and new metrics
+            tmp = vX0;
+            vX0 = vY0;
+            vY0 = tmp;
+            tmp = vX1;
+            vX1 = vY1;
+            vY1 = tmp;
+        }
+        if (n & 1) {
+            __riscv_vse8(X, vY0, vl);
+            __riscv_vse8(X + vl, vY1, vl);
+            __riscv_vse8(Y, vX0, vl);
+            __riscv_vse8(Y + vl, vX1, vl);
+        } else {
+            __riscv_vse8(X, vX0, vl);
+            __riscv_vse8(X + vl, vX1, vl);
+            __riscv_vse8(Y, vY0, vl);
+            __riscv_vse8(Y + vl, vY1, vl);
+        }
+    } else {
+        vuint8mf2_t vX0 = __riscv_vle8_v_u8mf2(X, vl),
+                    vX1 = __riscv_vle8_v_u8mf2(X + vl, vl);
+        vuint8mf2_t vY0 = __riscv_vle8_v_u8mf2(Y, vl),
+                    vY1 = __riscv_vle8_v_u8mf2(Y + vl, vl);
+        vuint8mf2_t vB0 = __riscv_vle8_v_u8mf2(Branchtab, vl);
+        vuint8mf2_t vB1 = __riscv_vle8_v_u8mf2(Branchtab + vl, vl);
+        vuint8mf2_t v63 = __riscv_vmv_v_x_u8mf2(63, vl);
+
+        for (size_t i = 0; i < n; ++i) {
+            // Butterfly
+            vuint8mf2_t va0 = __riscv_vxor(vB0, syms[2 * i + 0], vl);
+            vuint8mf2_t va1 = __riscv_vxor(vB1, syms[2 * i + 1], vl);
+            vuint8mf2_t va = __riscv_vaaddu(va0, va1, 0, vl);
+            va = __riscv_vreinterpret_u8mf2(
+                __riscv_vsrl(__riscv_vreinterpret_u16mf2(va), 2, vl / 2));
+            va = __riscv_vand(va, v63, vl);
+            vuint8mf2_t vb = __riscv_vssubu(v63, va, vl);
+            vuint8mf2_t vX0a = __riscv_vsaddu(vX0, va, vl);
+            vuint8mf2_t vX1b = __riscv_vsaddu(vX1, vb, vl);
+            vuint8mf2_t vX0b = __riscv_vsaddu(vX0, vb, vl);
+            vuint8mf2_t vX1a = __riscv_vsaddu(vX1, va, vl);
+            vY0 = __riscv_vminu(vX1b, vX0a, vl);
+            vY1 = __riscv_vminu(vX1a, vX0b, vl);
+
+            vuint8m1_t vX1ba = __riscv_vreinterpret_u8m1(
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vX1b, vX1a, vl), 0xFF, vX1a, vl));
+            vuint8m1_t vY01 = __riscv_vreinterpret_u8m1(
+                __riscv_vwmaccu(__riscv_vwaddu_vv(vY0, vY1, vl), 0xFF, vY1, vl));
+
+            __riscv_vsm(&dec[8 * i + 0], __riscv_vmseq(vY01, vX1ba, vl * 2), vl * 2);
+
+            // Renormalize
+            vuint8m1_t vmin =
+                __riscv_vrgather(__riscv_vredminu(vY01, vY01, vl * 2), 0, vl * 2);
+            vY01 = __riscv_vsub(vY01, vmin, vl * 2);
+
+            vY0 = __riscv_vlmul_trunc_u8mf2(vY01);
+            vY1 = __riscv_vlmul_trunc_u8mf2(__riscv_vslidedown(vY01, vl, vl));
+
+            vuint8mf2_t tmp; // Swap pointers to old and new metrics
+            tmp = vX0;
+            vX0 = vY0;
+            vY0 = tmp;
+            tmp = vX1;
+            vX1 = vY1;
+            vY1 = tmp;
+        }
+        if (n & 1) {
+            __riscv_vse8(X, vY0, vl);
+            __riscv_vse8(X + vl, vY1, vl);
+            __riscv_vse8(Y, vX0, vl);
+            __riscv_vse8(Y + vl, vX1, vl);
+        } else {
+            __riscv_vse8(X, vX0, vl);
+            __riscv_vse8(X + vl, vX1, vl);
+            __riscv_vse8(Y, vY0, vl);
+            __riscv_vse8(Y + vl, vY1, vl);
+        }
+    }
+}
+#endif /*LV_HAVE_RVV*/
+
 #endif /*INCLUDED_volk_8u_x4_conv_k7_r2_8u_H*/
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 2c160b2f..588db44f 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -93,12 +93,28 @@ execute_process(
     OUTPUT_VARIABLE arch_flag_lines
     OUTPUT_STRIP_TRAILING_WHITESPACE)
 
+try_compile(
+    HAVE_RVV_INTRINSICS
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_SOURCE_DIR}/cmake/Checks/check-rvv-intrinsics.c
+)
+if(HAVE_RVV_INTRINSICS)
+    message(STATUS "Checking RVV intrinsics - found")
+else()
+    message(STATUS "Checking RVV intrinsics - not found")
+endif()
+
 macro(check_arch arch_name)
     set(flags ${ARGN})
     set(have_${arch_name} TRUE)
+
+    string(SUBSTRING "${arch_name}" 0 2 arch_prefix)
     foreach(flag ${flags})
         if(MSVC AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE"))
             # SSE/SSE2 is supported in MSVC since VS 2005 but flag not available when compiling 64-bit so do not check
+        elseif("${arch_prefix}" STREQUAL "rv" AND NOT HAVE_RVV_INTRINSICS)
+            message(STATUS "Skipping ${arch_name} due to missing RVV intrinsics support")
+            set(have_${arch_name} FALSE)
         else()
             include(CheckCXXCompilerFlag)
             set(have_flag have${flag})
diff --git a/tmpl/volk_cpu.tmpl.c b/tmpl/volk_cpu.tmpl.c
index a4a06b0f..2cf2fa34 100644
--- a/tmpl/volk_cpu.tmpl.c
+++ b/tmpl/volk_cpu.tmpl.c
@@ -49,7 +49,7 @@ static int i_can_has_${arch.name} (void) {
 #if defined(CPU_FEATURES_ARCH_MIPS)
     if (GetMipsInfo().features.${check} == 0){ return 0; }
 #endif
-        %elif "riscv" in arch.name:
+        %elif "riscv" in arch.name or arch.name[:2] == "rv":
 #if defined(CPU_FEATURES_ARCH_RISCV)
     if (GetRiscvInfo().features.${check} == 0){ return 0; }
 #endif