diff --git a/kernels/volk/volk_8i_x2_saturated_sum_8i.h b/kernels/volk/volk_8i_x2_saturated_sum_8i.h
new file mode 100644
index 000000000..848b80033
--- /dev/null
+++ b/kernels/volk/volk_8i_x2_saturated_sum_8i.h
@@ -0,0 +1,277 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2015 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*!
+ * \page volk_8i_x2_saturated_sum_8i
+ *
+ * \b Overview
+ *
+ * Carry out a saturated addition on a vectors of signed 8-bit integers.
+ *
+ * Dispatcher Prototype
+ * \code
+ * void volk_8i_x2_saturated_sum_8i(int8_t* outVector, const int8_t* inVectorA, const int8_t* inVectorB, const unsigned int num_vals)
+ * \endcode
+ *
+ * \b Inputs
+ * \li inVectorA: The first input vector of 8-bit integers.
+ * \li inVectorB: The second input vector of 8-bit integers.
+ * \li num_vals: The number of data points.
+ *
+ * \b Outputs
+ * \li outVector: The resulting output of 8-bit integers.
+ *
+ * \b Example
+ *
+ * The follow example adds some vectors such that the result will eventually saturate
+ *
+ * \code
+ * int N = 100;
+ * unsigned int alignment = volk_get_alignment();
+ * int8_t* vecA = (int8_t*)volk_malloc(sizeof(int8_t)*N, alignment);
+ * int8_t* vecB = (int8_t*)volk_malloc(sizeof(int8_t)*N, alignment);
+ * int8_t* out = (int8_t*)volk_malloc(sizeof(int8_t)*N, alignment);
+ *
+ * for(unsigned int ii = 0; ii < N; ++ii){
+ * vecA[ii] = ii;
+ * vecB[ii] = 100+ii;
+ * }
+ *
+ * volk_8i_x2_saturated_sum_8i(out, vecA, vecB, N);
+ *
+ * for(unsigned int ii = 0; ii < N; ++ii){
+ * printf("out[%u] = %d\n", ii, out[ii]);
+ * }
+ *
+ * volk_free(vecA);
+ * volk_free(vecB);
+ * volk_free(out);
+ * \endcode
+ */
+
+#ifndef INCLUDED_volk_8i_x2_saturated_sum_8i_u_H
+#define INCLUDED_volk_8i_x2_saturated_sum_8i_u_H
+
+#include
+#include
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void
+volk_8i_x2_saturated_sum_8i_generic(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < num_vals; i++){
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#ifdef LV_HAVE_SSE2
+
+#include
+
+static inline void
+volk_8i_x2_saturated_sum_8i_u_sse2(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ const unsigned int VEC_SIZE = 16;
+ const unsigned int vecNum = num_vals / VEC_SIZE;
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < vecNum; i++) {
+ __m128i a = _mm_loadu_si128((const __m128i*)(inVectorA + VEC_SIZE*i));
+ __m128i b = _mm_loadu_si128((const __m128i*)(inVectorB + VEC_SIZE*i));
+ __m128i result = _mm_adds_epi8(b, a);
+ _mm_storeu_si128((__m128i*)(outVector + VEC_SIZE*i), result);
+ }
+
+ i = VEC_SIZE*vecNum;
+ for(; i < num_vals; i++) {
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+
+#endif /* LV_HAVE_SSE2 for unaligned */
+
+
+#ifdef LV_HAVE_AVX2
+
+#include
+
+static inline void
+volk_8i_x2_saturated_sum_8i_u_avx2(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ const unsigned int VEC_SIZE = 32;
+ const unsigned int vecNum = num_vals / VEC_SIZE;
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < vecNum; i++) {
+ __m256i a = _mm256_loadu_si256((const __m256i*)(inVectorA + VEC_SIZE*i));
+ __m256i b = _mm256_loadu_si256((const __m256i*)(inVectorB + VEC_SIZE*i));
+ __m256i result = _mm256_adds_epi8(b, a);
+ _mm256_storeu_si256((__m256i*)(outVector + VEC_SIZE*i), result);
+ }
+
+ i = VEC_SIZE*vecNum;
+ for(; i < num_vals; i++) {
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+
+#endif /* LV_HAVE_AVX2 for unaligned */
+
+#endif /* INCLUDED_VOLK_8s_SATURATED_SUM_8s_UNALIGNED8_H */
+
+
+#ifndef INCLUDED_volk_8i_x2_saturated_sum_8i_a_H
+#define INCLUDED_volk_8i_x2_saturated_sum_8i_a_H
+
+#ifdef LV_HAVE_SSE2
+
+#include
+
+static inline void
+volk_8i_x2_saturated_sum_8i_a_sse2(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ const unsigned int VEC_SIZE = 16;
+ const unsigned int vecNum = num_vals / VEC_SIZE;
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < vecNum; i++) {
+ __m128i a = _mm_load_si128((const __m128i*)(inVectorA + VEC_SIZE*i));
+ __m128i b = _mm_load_si128((const __m128i*)(inVectorB + VEC_SIZE*i));
+ __m128i result = _mm_adds_epi8(b, a);
+ _mm_store_si128((__m128i*)(outVector + VEC_SIZE*i), result);
+ }
+
+ i = VEC_SIZE*vecNum;
+ for(; i < num_vals; i++) {
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+
+#endif /* LV_HAVE_SSE2 for aligned */
+
+
+#ifdef LV_HAVE_AVX2
+
+#include
+
+static inline void
+volk_8i_x2_saturated_sum_8i_a_avx2(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ const unsigned int VEC_SIZE = 32;
+ const unsigned int vecNum = num_vals / VEC_SIZE;
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < vecNum; i++) {
+ __m256i a = _mm256_load_si256((const __m256i*)(inVectorA + VEC_SIZE*i));
+ __m256i b = _mm256_load_si256((const __m256i*)(inVectorB + VEC_SIZE*i));
+ __m256i result = _mm256_adds_epi8(b, a);
+ _mm256_store_si256((__m256i*)(outVector + VEC_SIZE*i), result);
+ }
+
+ i = VEC_SIZE*vecNum;
+ for(; i < num_vals; i++) {
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+
+#endif /* LV_HAVE_AVX2 for aligned */
+
+
+#ifdef LV_HAVE_NEON
+
+#include
+
+static inline void
+volk_8i_x2_saturated_sum_8i_a_neon(int8_t* outVector,
+ const int8_t* inVectorA, const int8_t* inVectorB,
+ const unsigned int num_vals)
+{
+ const unsigned int VEC_SIZE = 16;
+ const unsigned int vecNum = num_vals / VEC_SIZE;
+ unsigned int i = 0;
+ int16_t tmpPlus; // Need to move to more bits to workaround possible overflow
+
+ for(; i < vecNum; i++) {
+ int8x16_t a = vld1q_s8((const int8x16_t*)(inVectorA + VEC_SIZE*i));
+ int8x16_t b = vld1q_s8((const int8x16_t*)(inVectorB + VEC_SIZE*i));
+ vst1q_s8((const int8x16_t*)(outVector + VEC_SIZE*i), vqaddq_s8(b, a));
+ }
+
+ i = VEC_SIZE*vecNum;
+ for(; i < num_vals; i++) {
+ tmpPlus = inVectorB[i] + inVectorA[i];
+ // Saturate to avoid overflow
+ tmpPlus = (tmpPlus > 127) ? 127 : tmpPlus;
+ tmpPlus = (tmpPlus < -128) ? -128 : tmpPlus;
+
+ outVector[i] = ((int8_t)tmpPlus);
+ }
+}
+
+#endif /* LV_HAVE_NEON for aligned */
+
+#endif /* INCLUDED_VOLK_8s_SATURATED_SUM_8s_ALIGNED8_H */
diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h
index 23c0c45a4..76125dfad 100644
--- a/lib/kernel_tests.h
+++ b/lib/kernel_tests.h
@@ -115,6 +115,7 @@ std::vector init_test_list(volk_test_params_t test_params)
(VOLK_INIT_TEST(volk_8ic_x2_s32f_multiply_conjugate_32fc, test_params))
(VOLK_INIT_TEST(volk_8i_convert_16i, test_params))
(VOLK_INIT_TEST(volk_8i_s32f_convert_32f, test_params))
+ (VOLK_INIT_TEST(volk_8i_x2_saturated_sum_8i, test_params))
(VOLK_INIT_TEST(volk_32fc_s32fc_multiply_32fc, test_params))
(VOLK_INIT_TEST(volk_32f_s32f_multiply_32f, test_params))
(VOLK_INIT_TEST(volk_32f_binary_slicer_32i, test_params))