Skip to content

Commit 98f43f7

Browse files
committed
Add support for saturating add/subtract MMX instructions
With the newly added saturating addition/subtraction it is possible to support MMX instructions performing saturating arithmetic over vectors.
1 parent 990c3df commit 98f43f7

File tree

4 files changed

+245
-2
lines changed

4 files changed

+245
-2
lines changed

regression/cbmc/SIMD1/main.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#include <assert.h>
2+
#include <limits.h>
3+
24
#ifdef _MSC_VER
35
# include <intrin.h>
6+
# ifdef _WIN64
7+
# define _mm_extract_pi16(a, b) _mm_extract_epi16(a, b)
8+
# endif
49
#else
510
# include <immintrin.h>
611
#endif
@@ -10,5 +15,44 @@ int main()
1015
__m128i values = _mm_setr_epi32(0x1234, 0x2345, 0x3456, 0x4567);
1116
int val1 = _mm_extract_epi32(values, 0);
1217
assert(val1 == 0x1234);
18+
19+
#ifndef _WIN64
20+
__m64 a = _mm_setr_pi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX);
21+
__m64 b = _mm_set_pi16(1, 1, 10, 1);
22+
__m64 result = _mm_subs_pi16(a, b);
23+
#else
24+
__m128i a = _mm_setr_epi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX, 0, 0, 0, 0);
25+
__m128i b = _mm_set_epi16(0, 0, 0, 0, 1, 1, 10, 1);
26+
__m128i result = _mm_subs_epi16(a, b);
27+
#endif
28+
short s1 = _mm_extract_pi16(result, 0);
29+
assert(s1 == SHRT_MIN);
30+
short s2 = _mm_extract_pi16(result, 1);
31+
assert(s2 == 0);
32+
short s3 = _mm_extract_pi16(result, 2);
33+
assert(s3 == SHRT_MIN);
34+
short s4 = _mm_extract_pi16(result, 3);
35+
assert(s4 == SHRT_MAX - 1);
36+
37+
#ifndef _WIN64
38+
result = _mm_adds_pi16(a, b);
39+
#else
40+
result = _mm_adds_epi16(a, b);
41+
#endif
42+
s1 = _mm_extract_pi16(result, 0);
43+
assert(s1 == SHRT_MIN + 1);
44+
s2 = _mm_extract_pi16(result, 1);
45+
assert(s2 == 20);
46+
s3 = _mm_extract_pi16(result, 2);
47+
assert(s3 == SHRT_MIN + 2);
48+
s4 = _mm_extract_pi16(result, 3);
49+
assert(s4 == SHRT_MAX);
50+
51+
__m128i x = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
52+
__m128i y = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
53+
__m128i result128 = _mm_subs_epu16(x, y);
54+
short s = _mm_extract_epi16(result128, 0);
55+
assert(s == 0);
56+
1357
return 0;
1458
}

src/ansi-c/library/gcc.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,24 @@ __CPROVER_HIDE:;
192192
return size <= sizeof(__CPROVER_size_t);
193193
}
194194

195+
/* FUNCTION: __builtin_ia32_vec_ext_v4hi */
196+
197+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
198+
199+
short __builtin_ia32_vec_ext_v4hi(__gcc_v4hi vec, int offset)
200+
{
201+
return *((short *)&vec + offset);
202+
}
203+
204+
/* FUNCTION: __builtin_ia32_vec_ext_v8hi */
205+
206+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
207+
208+
short __builtin_ia32_vec_ext_v8hi(__gcc_v8hi vec, int offset)
209+
{
210+
return *((short *)&vec + offset);
211+
}
212+
195213
/* FUNCTION: __builtin_ia32_vec_ext_v4si */
196214

197215
typedef int __gcc_v4si __attribute__((__vector_size__(16)));
@@ -227,3 +245,70 @@ float __builtin_ia32_vec_ext_v4sf(__gcc_v4sf vec, int offset)
227245
{
228246
return *((float *)&vec + offset);
229247
}
248+
249+
/* FUNCTION: __builtin_ia32_psubsw128 */
250+
251+
#ifndef LIBRARY_CHECK
252+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
253+
#else
254+
__gcc_v8hi __CPROVER_saturating_minus();
255+
#endif
256+
257+
__gcc_v8hi __builtin_ia32_psubsw128(__gcc_v8hi a, __gcc_v8hi b)
258+
{
259+
return __CPROVER_saturating_minus(a, b);
260+
}
261+
262+
/* FUNCTION: __builtin_ia32_psubusw128 */
263+
264+
#ifndef LIBRARY_CHECK
265+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
266+
#endif
267+
268+
__gcc_v8hi __builtin_ia32_psubusw128(__gcc_v8hi a, __gcc_v8hi b)
269+
{
270+
typedef unsigned short v8hi_u __attribute__((__vector_size__(16)));
271+
return (__gcc_v8hi)__CPROVER_saturating_minus((v8hi_u)a, (v8hi_u)b);
272+
}
273+
274+
/* FUNCTION: __builtin_ia32_paddsw */
275+
276+
#ifndef LIBRARY_CHECK
277+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
278+
#else
279+
__gcc_v4hi __CPROVER_saturating_plus();
280+
#endif
281+
282+
__gcc_v4hi __builtin_ia32_paddsw(__gcc_v4hi a, __gcc_v4hi b)
283+
{
284+
return __CPROVER_saturating_plus(a, b);
285+
}
286+
287+
/* FUNCTION: __builtin_ia32_psubsw */
288+
289+
#ifndef LIBRARY_CHECK
290+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
291+
#else
292+
__gcc_v4hi __CPROVER_saturating_minus_v4hi(__gcc_v4hi, __gcc_v4hi);
293+
# define __CPROVER_saturating_minus __CPROVER_saturating_minus_v4hi
294+
#endif
295+
296+
__gcc_v4hi __builtin_ia32_psubsw(__gcc_v4hi a, __gcc_v4hi b)
297+
{
298+
return __CPROVER_saturating_minus(a, b);
299+
}
300+
301+
#ifdef LIBRARY_CHECK
302+
# undef __CPROVER_saturating_minus
303+
#endif
304+
305+
/* FUNCTION: __builtin_ia32_vec_init_v4hi */
306+
307+
#ifndef LIBRARY_CHECK
308+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
309+
#endif
310+
311+
__gcc_v4hi __builtin_ia32_vec_init_v4hi(short e0, short e1, short e2, short e3)
312+
{
313+
return (__gcc_v4hi){e0, e1, e2, e3};
314+
}

src/ansi-c/library/intrin.c

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,78 @@ inline __m128i _mm_setr_epi32(int e3, int e2, int e1, int e0)
371371
}
372372
#endif
373373

374+
/* FUNCTION: _mm_set_epi16 */
375+
376+
#ifdef _MSC_VER
377+
# ifndef __CPROVER_shortRIN_H_INCLUDED
378+
# include <shortrin.h>
379+
# define __CPROVER_shortRIN_H_INCLUDED
380+
# endif
381+
382+
inline __m128i _mm_set_epi16(
383+
short e7,
384+
short e6,
385+
short e5,
386+
short e4,
387+
short e3,
388+
short e2,
389+
short e1,
390+
short e0)
391+
{
392+
return (__m128i){.m128i_i16 = {e0, e1, e2, e3, e4, e5, e6, e7}};
393+
}
394+
#endif
395+
396+
/* FUNCTION: _mm_setr_epi16 */
397+
398+
#ifdef _MSC_VER
399+
# ifndef __CPROVER_shortRIN_H_INCLUDED
400+
# include <shortrin.h>
401+
# define __CPROVER_shortRIN_H_INCLUDED
402+
# endif
403+
404+
inline __m128i _mm_setr_epi16(
405+
short e7,
406+
short e6,
407+
short e5,
408+
short e4,
409+
short e3,
410+
short e2,
411+
short e1,
412+
short e0)
413+
{
414+
return (__m128i){.m128i_i16 = {e7, e6, e5, e4, e3, e2, e1, e0}};
415+
}
416+
#endif
417+
418+
/* FUNCTION: _mm_set_pi16 */
419+
420+
#ifdef _MSC_VER
421+
# ifndef __CPROVER_INTRIN_H_INCLUDED
422+
# include <intrin.h>
423+
# define __CPROVER_INTRIN_H_INCLUDED
424+
# endif
425+
426+
inline __m64 _mm_set_pi16(short e3, short e2, short e1, short e0)
427+
{
428+
return (__m64){.m64_i16 = {e0, e1, e2, e3}};
429+
}
430+
#endif
431+
432+
/* FUNCTION: _mm_setr_pi16 */
433+
434+
#ifdef _MSC_VER
435+
# ifndef __CPROVER_shortRIN_H_INCLUDED
436+
# include <shortrin.h>
437+
# define __CPROVER_shortRIN_H_INCLUDED
438+
# endif
439+
440+
inline __m64 _mm_setr_pi16(short e3, short e2, short e1, short e0)
441+
{
442+
return (__m64){.m64_i16 = {e3, e2, e1, e0}};
443+
}
444+
#endif
445+
374446
/* FUNCTION: _mm_extract_epi32 */
375447

376448
#ifdef _MSC_VER
@@ -384,3 +456,17 @@ inline int _mm_extract_epi32(__m128i a, const int imm8)
384456
return a.m128i_i32[imm8];
385457
}
386458
#endif
459+
460+
/* FUNCTION: _mm_extract_pi16 */
461+
462+
#ifdef _MSC_VER
463+
# ifndef __CPROVER_INTRIN_H_INCLUDED
464+
# include <intrin.h>
465+
# define __CPROVER_INTRIN_H_INCLUDED
466+
# endif
467+
468+
inline int _mm_extract_pi16(__m64 a, const int imm8)
469+
{
470+
return a.m64_i16[imm8];
471+
}
472+
#endif

src/goto-programs/remove_vector.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ static bool have_to_remove_vector(const exprt &expr)
2828
expr.id() == ID_plus || expr.id() == ID_minus || expr.id() == ID_mult ||
2929
expr.id() == ID_div || expr.id() == ID_mod || expr.id() == ID_bitxor ||
3030
expr.id() == ID_bitand || expr.id() == ID_bitor || expr.id() == ID_shl ||
31-
expr.id() == ID_lshr || expr.id() == ID_ashr)
31+
expr.id() == ID_lshr || expr.id() == ID_ashr ||
32+
expr.id() == ID_saturating_minus || expr.id() == ID_saturating_plus)
3233
{
3334
return true;
3435
}
@@ -102,7 +103,8 @@ static void remove_vector(exprt &expr)
102103
expr.id() == ID_plus || expr.id() == ID_minus || expr.id() == ID_mult ||
103104
expr.id() == ID_div || expr.id() == ID_mod || expr.id() == ID_bitxor ||
104105
expr.id() == ID_bitand || expr.id() == ID_bitor || expr.id() == ID_shl ||
105-
expr.id() == ID_lshr || expr.id() == ID_ashr)
106+
expr.id() == ID_lshr || expr.id() == ID_ashr ||
107+
expr.id() == ID_saturating_minus || expr.id() == ID_saturating_plus)
106108
{
107109
// FIXME plus, mult, bitxor, bitand and bitor are defined as n-ary
108110
// operations rather than binary. This code assumes that they
@@ -243,6 +245,32 @@ static void remove_vector(exprt &expr)
243245
expr = array_exprt(exprt::operandst(dimension, casted_op), array_type);
244246
expr.add_source_location() = std::move(source_location);
245247
}
248+
else if(
249+
expr.type().id() == ID_vector &&
250+
to_vector_type(expr.type()).size() == to_array_type(op.type()).size())
251+
{
252+
// do component-wise typecast:
253+
// (vector-type) x -> array((vector-sub-type)x[0], ...)
254+
remove_vector(expr.type());
255+
const array_typet &array_type = to_array_type(expr.type());
256+
const std::size_t dimension =
257+
numeric_cast_v<std::size_t>(to_constant_expr(array_type.size()));
258+
const typet subtype = array_type.element_type();
259+
260+
exprt::operandst elements;
261+
elements.reserve(dimension);
262+
263+
for(std::size_t i = 0; i < dimension; i++)
264+
{
265+
exprt index = from_integer(i, array_type.size().type());
266+
elements.push_back(
267+
typecast_exprt{index_exprt{op, std::move(index)}, subtype});
268+
}
269+
270+
array_exprt array_expr(std::move(elements), array_type);
271+
array_expr.add_source_location() = expr.source_location();
272+
expr.swap(array_expr);
273+
}
246274
}
247275
}
248276

0 commit comments

Comments
 (0)