Add support for saturating add/subtract MMX instructions

tautschnig · tautschnig · commit 98f43f73c0f8 · 2022-02-07T20:56:48.000Z
With the newly added saturating addition/subtraction it is possible to
support MMX instructions performing saturating arithmetic over vectors.
diff --git a/regression/cbmc/SIMD1/main.c b/regression/cbmc/SIMD1/main.c
@@ -1,6 +1,11 @@
 #include <assert.h>
+#include <limits.h>
+
 #ifdef _MSC_VER
 #  include <intrin.h>
+#  ifdef _WIN64
+#    define _mm_extract_pi16(a, b) _mm_extract_epi16(a, b)
+#  endif
 #else
 #  include <immintrin.h>
 #endif
@@ -10,5 +15,44 @@ int main()
   __m128i values = _mm_setr_epi32(0x1234, 0x2345, 0x3456, 0x4567);
   int val1 = _mm_extract_epi32(values, 0);
   assert(val1 == 0x1234);
+
+#ifndef _WIN64
+  __m64 a = _mm_setr_pi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX);
+  __m64 b = _mm_set_pi16(1, 1, 10, 1);
+  __m64 result = _mm_subs_pi16(a, b);
+#else
+  __m128i a = _mm_setr_epi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX, 0, 0, 0, 0);
+  __m128i b = _mm_set_epi16(0, 0, 0, 0, 1, 1, 10, 1);
+  __m128i result = _mm_subs_epi16(a, b);
+#endif
+  short s1 = _mm_extract_pi16(result, 0);
+  assert(s1 == SHRT_MIN);
+  short s2 = _mm_extract_pi16(result, 1);
+  assert(s2 == 0);
+  short s3 = _mm_extract_pi16(result, 2);
+  assert(s3 == SHRT_MIN);
+  short s4 = _mm_extract_pi16(result, 3);
+  assert(s4 == SHRT_MAX - 1);
+
+#ifndef _WIN64
+  result = _mm_adds_pi16(a, b);
+#else
+  result = _mm_adds_epi16(a, b);
+#endif
+  s1 = _mm_extract_pi16(result, 0);
+  assert(s1 == SHRT_MIN + 1);
+  s2 = _mm_extract_pi16(result, 1);
+  assert(s2 == 20);
+  s3 = _mm_extract_pi16(result, 2);
+  assert(s3 == SHRT_MIN + 2);
+  s4 = _mm_extract_pi16(result, 3);
+  assert(s4 == SHRT_MAX);
+
+  __m128i x = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
+  __m128i y = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
+  __m128i result128 = _mm_subs_epu16(x, y);
+  short s = _mm_extract_epi16(result128, 0);
+  assert(s == 0);
+
   return 0;
 }
diff --git a/src/ansi-c/library/gcc.c b/src/ansi-c/library/gcc.c
@@ -192,6 +192,24 @@ __CPROVER_HIDE:;
   return size <= sizeof(__CPROVER_size_t);
 }
 
+/* FUNCTION: __builtin_ia32_vec_ext_v4hi */
+
+typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
+
+short __builtin_ia32_vec_ext_v4hi(__gcc_v4hi vec, int offset)
+{
+  return *((short *)&vec + offset);
+}
+
+/* FUNCTION: __builtin_ia32_vec_ext_v8hi */
+
+typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
+
+short __builtin_ia32_vec_ext_v8hi(__gcc_v8hi vec, int offset)
+{
+  return *((short *)&vec + offset);
+}
+
 /* FUNCTION: __builtin_ia32_vec_ext_v4si */
 
 typedef int __gcc_v4si __attribute__((__vector_size__(16)));
@@ -227,3 +245,70 @@ float __builtin_ia32_vec_ext_v4sf(__gcc_v4sf vec, int offset)
 {
   return *((float *)&vec + offset);
 }
+
+/* FUNCTION: __builtin_ia32_psubsw128 */
+
+#ifndef LIBRARY_CHECK
+typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
+#else
+__gcc_v8hi __CPROVER_saturating_minus();
+#endif
+
+__gcc_v8hi __builtin_ia32_psubsw128(__gcc_v8hi a, __gcc_v8hi b)
+{
+  return __CPROVER_saturating_minus(a, b);
+}
+
+/* FUNCTION: __builtin_ia32_psubusw128 */
+
+#ifndef LIBRARY_CHECK
+typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
+#endif
+
+__gcc_v8hi __builtin_ia32_psubusw128(__gcc_v8hi a, __gcc_v8hi b)
+{
+  typedef unsigned short v8hi_u __attribute__((__vector_size__(16)));
+  return (__gcc_v8hi)__CPROVER_saturating_minus((v8hi_u)a, (v8hi_u)b);
+}
+
+/* FUNCTION: __builtin_ia32_paddsw */
+
+#ifndef LIBRARY_CHECK
+typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
+#else
+__gcc_v4hi __CPROVER_saturating_plus();
+#endif
+
+__gcc_v4hi __builtin_ia32_paddsw(__gcc_v4hi a, __gcc_v4hi b)
+{
+  return __CPROVER_saturating_plus(a, b);
+}
+
+/* FUNCTION: __builtin_ia32_psubsw */
+
+#ifndef LIBRARY_CHECK
+typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
+#else
+__gcc_v4hi __CPROVER_saturating_minus_v4hi(__gcc_v4hi, __gcc_v4hi);
+#  define __CPROVER_saturating_minus __CPROVER_saturating_minus_v4hi
+#endif
+
+__gcc_v4hi __builtin_ia32_psubsw(__gcc_v4hi a, __gcc_v4hi b)
+{
+  return __CPROVER_saturating_minus(a, b);
+}
+
+#ifdef LIBRARY_CHECK
+#  undef __CPROVER_saturating_minus
+#endif
+
+/* FUNCTION: __builtin_ia32_vec_init_v4hi */
+
+#ifndef LIBRARY_CHECK
+typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
+#endif
+
+__gcc_v4hi __builtin_ia32_vec_init_v4hi(short e0, short e1, short e2, short e3)
+{
+  return (__gcc_v4hi){e0, e1, e2, e3};
+}
diff --git a/src/ansi-c/library/intrin.c b/src/ansi-c/library/intrin.c
@@ -371,6 +371,78 @@ inline __m128i _mm_setr_epi32(int e3, int e2, int e1, int e0)
 }
 #endif
 
+/* FUNCTION: _mm_set_epi16 */
+
+#ifdef _MSC_VER
+#  ifndef __CPROVER_shortRIN_H_INCLUDED
+#    include <shortrin.h>
+#    define __CPROVER_shortRIN_H_INCLUDED
+#  endif
+
+inline __m128i _mm_set_epi16(
+  short e7,
+  short e6,
+  short e5,
+  short e4,
+  short e3,
+  short e2,
+  short e1,
+  short e0)
+{
+  return (__m128i){.m128i_i16 = {e0, e1, e2, e3, e4, e5, e6, e7}};
+}
+#endif
+
+/* FUNCTION: _mm_setr_epi16 */
+
+#ifdef _MSC_VER
+#  ifndef __CPROVER_shortRIN_H_INCLUDED
+#    include <shortrin.h>
+#    define __CPROVER_shortRIN_H_INCLUDED
+#  endif
+
+inline __m128i _mm_setr_epi16(
+  short e7,
+  short e6,
+  short e5,
+  short e4,
+  short e3,
+  short e2,
+  short e1,
+  short e0)
+{
+  return (__m128i){.m128i_i16 = {e7, e6, e5, e4, e3, e2, e1, e0}};
+}
+#endif
+
+/* FUNCTION: _mm_set_pi16 */
+
+#ifdef _MSC_VER
+#  ifndef __CPROVER_INTRIN_H_INCLUDED
+#    include <intrin.h>
+#    define __CPROVER_INTRIN_H_INCLUDED
+#  endif
+
+inline __m64 _mm_set_pi16(short e3, short e2, short e1, short e0)
+{
+  return (__m64){.m64_i16 = {e0, e1, e2, e3}};
+}
+#endif
+
+/* FUNCTION: _mm_setr_pi16 */
+
+#ifdef _MSC_VER
+#  ifndef __CPROVER_shortRIN_H_INCLUDED
+#    include <shortrin.h>
+#    define __CPROVER_shortRIN_H_INCLUDED
+#  endif
+
+inline __m64 _mm_setr_pi16(short e3, short e2, short e1, short e0)
+{
+  return (__m64){.m64_i16 = {e3, e2, e1, e0}};
+}
+#endif
+
 /* FUNCTION: _mm_extract_epi32 */
 
 #ifdef _MSC_VER
@@ -384,3 +456,17 @@ inline int _mm_extract_epi32(__m128i a, const int imm8)
   return a.m128i_i32[imm8];
 }
 #endif
+
+/* FUNCTION: _mm_extract_pi16 */
+
+#ifdef _MSC_VER
+#  ifndef __CPROVER_INTRIN_H_INCLUDED
+#    include <intrin.h>
+#    define __CPROVER_INTRIN_H_INCLUDED
+#  endif
+
+inline int _mm_extract_pi16(__m64 a, const int imm8)
+{
+  return a.m64_i16[imm8];
+}
+#endif
diff --git a/src/goto-programs/remove_vector.cpp b/src/goto-programs/remove_vector.cpp
@@ -28,7 +28,8 @@ static bool have_to_remove_vector(const exprt &expr)
       expr.id() == ID_plus || expr.id() == ID_minus || expr.id() == ID_mult ||
       expr.id() == ID_div || expr.id() == ID_mod || expr.id() == ID_bitxor ||
       expr.id() == ID_bitand || expr.id() == ID_bitor || expr.id() == ID_shl ||
-      expr.id() == ID_lshr || expr.id() == ID_ashr)
+      expr.id() == ID_lshr || expr.id() == ID_ashr ||
+      expr.id() == ID_saturating_minus || expr.id() == ID_saturating_plus)
     {
       return true;
     }
@@ -102,7 +103,8 @@ static void remove_vector(exprt &expr)
       expr.id() == ID_plus || expr.id() == ID_minus || expr.id() == ID_mult ||
       expr.id() == ID_div || expr.id() == ID_mod || expr.id() == ID_bitxor ||
       expr.id() == ID_bitand || expr.id() == ID_bitor || expr.id() == ID_shl ||
-      expr.id() == ID_lshr || expr.id() == ID_ashr)
+      expr.id() == ID_lshr || expr.id() == ID_ashr ||
+      expr.id() == ID_saturating_minus || expr.id() == ID_saturating_plus)
     {
       // FIXME plus, mult, bitxor, bitand and bitor are defined as n-ary
       //      operations rather than binary. This code assumes that they
@@ -243,6 +245,32 @@ static void remove_vector(exprt &expr)
         expr = array_exprt(exprt::operandst(dimension, casted_op), array_type);
         expr.add_source_location() = std::move(source_location);
       }
+      else if(
+        expr.type().id() == ID_vector &&
+        to_vector_type(expr.type()).size() == to_array_type(op.type()).size())
+      {
+        // do component-wise typecast:
+        // (vector-type) x -> array((vector-sub-type)x[0], ...)
+        remove_vector(expr.type());
+        const array_typet &array_type = to_array_type(expr.type());
+        const std::size_t dimension =
+          numeric_cast_v<std::size_t>(to_constant_expr(array_type.size()));
+        const typet subtype = array_type.element_type();
+
+        exprt::operandst elements;
+        elements.reserve(dimension);
+
+        for(std::size_t i = 0; i < dimension; i++)
+        {
+          exprt index = from_integer(i, array_type.size().type());
+          elements.push_back(
+            typecast_exprt{index_exprt{op, std::move(index)}, subtype});
+        }
+
+        array_exprt array_expr(std::move(elements), array_type);
+        array_expr.add_source_location() = expr.source_location();
+        expr.swap(array_expr);
+      }
     }
   }