Skip to content

Commit 10ea245

Browse files
committed
Add support for saturating add/subtract MMX instructions
With the newly added saturating addition/subtraction it is possible to support MMX instructions performing saturating arithmetic over vectors.
1 parent 56a7a49 commit 10ea245

File tree

4 files changed

+355
-2
lines changed

4 files changed

+355
-2
lines changed

regression/cbmc/SIMD1/main.c

+44
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#include <assert.h>
2+
#include <limits.h>
3+
24
#ifdef _MSC_VER
35
# include <intrin.h>
6+
# ifdef _WIN64
7+
# define _mm_extract_pi16(a, b) _mm_extract_epi16(a, b)
8+
# endif
49
#else
510
# include <immintrin.h>
611
#endif
@@ -10,5 +15,44 @@ int main()
1015
__m128i values = _mm_setr_epi32(0x1234, 0x2345, 0x3456, 0x4567);
1116
int val1 = _mm_extract_epi32(values, 0);
1217
assert(val1 == 0x1234);
18+
19+
#ifndef _WIN64
20+
__m64 a = _mm_setr_pi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX);
21+
__m64 b = _mm_set_pi16(1, 1, 10, 1);
22+
__m64 result = _mm_subs_pi16(a, b);
23+
#else
24+
__m128i a = _mm_setr_epi16(SHRT_MIN, 10, SHRT_MIN + 1, SHRT_MAX, 0, 0, 0, 0);
25+
__m128i b = _mm_set_epi16(0, 0, 0, 0, 1, 1, 10, 1);
26+
__m128i result = _mm_subs_epi16(a, b);
27+
#endif
28+
short s1 = _mm_extract_pi16(result, 0);
29+
assert(s1 == SHRT_MIN);
30+
short s2 = _mm_extract_pi16(result, 1);
31+
assert(s2 == 0);
32+
short s3 = _mm_extract_pi16(result, 2);
33+
assert(s3 == SHRT_MIN);
34+
short s4 = _mm_extract_pi16(result, 3);
35+
assert(s4 == SHRT_MAX - 1);
36+
37+
#ifndef _WIN64
38+
result = _mm_adds_pi16(a, b);
39+
#else
40+
result = _mm_adds_epi16(a, b);
41+
#endif
42+
s1 = _mm_extract_pi16(result, 0);
43+
assert(s1 == SHRT_MIN + 1);
44+
s2 = _mm_extract_pi16(result, 1);
45+
assert(s2 == 20);
46+
s3 = _mm_extract_pi16(result, 2);
47+
assert(s3 == SHRT_MIN + 2);
48+
s4 = _mm_extract_pi16(result, 3);
49+
assert(s4 == SHRT_MAX);
50+
51+
__m128i x = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
52+
__m128i y = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
53+
__m128i result128 = _mm_subs_epu16(x, y);
54+
short s = _mm_extract_epi16(result128, 0);
55+
assert(s == 0);
56+
1357
return 0;
1458
}

src/ansi-c/library/gcc.c

+85
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,24 @@ __CPROVER_HIDE:;
192192
return size <= sizeof(__CPROVER_size_t);
193193
}
194194

195+
/* FUNCTION: __builtin_ia32_vec_ext_v4hi */
196+
197+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
198+
199+
short __builtin_ia32_vec_ext_v4hi(__gcc_v4hi vec, int offset)
200+
{
201+
return *((short *)&vec + offset);
202+
}
203+
204+
/* FUNCTION: __builtin_ia32_vec_ext_v8hi */
205+
206+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
207+
208+
short __builtin_ia32_vec_ext_v8hi(__gcc_v8hi vec, int offset)
209+
{
210+
return *((short *)&vec + offset);
211+
}
212+
195213
/* FUNCTION: __builtin_ia32_vec_ext_v4si */
196214

197215
typedef int __gcc_v4si __attribute__((__vector_size__(16)));
@@ -227,3 +245,70 @@ float __builtin_ia32_vec_ext_v4sf(__gcc_v4sf vec, int offset)
227245
{
228246
return *((float *)&vec + offset);
229247
}
248+
249+
/* FUNCTION: __builtin_ia32_psubsw128 */
250+
251+
#ifndef LIBRARY_CHECK
252+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
253+
#else
254+
__gcc_v8hi __CPROVER_saturating_minus();
255+
#endif
256+
257+
__gcc_v8hi __builtin_ia32_psubsw128(__gcc_v8hi a, __gcc_v8hi b)
258+
{
259+
return __CPROVER_saturating_minus(a, b);
260+
}
261+
262+
/* FUNCTION: __builtin_ia32_psubusw128 */
263+
264+
#ifndef LIBRARY_CHECK
265+
typedef short __gcc_v8hi __attribute__((__vector_size__(16)));
266+
#endif
267+
268+
__gcc_v8hi __builtin_ia32_psubusw128(__gcc_v8hi a, __gcc_v8hi b)
269+
{
270+
typedef unsigned short v8hi_u __attribute__((__vector_size__(16)));
271+
return (__gcc_v8hi)__CPROVER_saturating_minus((v8hi_u)a, (v8hi_u)b);
272+
}
273+
274+
/* FUNCTION: __builtin_ia32_paddsw */
275+
276+
#ifndef LIBRARY_CHECK
277+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
278+
#else
279+
__gcc_v4hi __CPROVER_saturating_plus();
280+
#endif
281+
282+
__gcc_v4hi __builtin_ia32_paddsw(__gcc_v4hi a, __gcc_v4hi b)
283+
{
284+
return __CPROVER_saturating_plus(a, b);
285+
}
286+
287+
/* FUNCTION: __builtin_ia32_psubsw */
288+
289+
#ifndef LIBRARY_CHECK
290+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
291+
#else
292+
__gcc_v4hi __CPROVER_saturating_minus_v4hi(__gcc_v4hi, __gcc_v4hi);
293+
# define __CPROVER_saturating_minus __CPROVER_saturating_minus_v4hi
294+
#endif
295+
296+
__gcc_v4hi __builtin_ia32_psubsw(__gcc_v4hi a, __gcc_v4hi b)
297+
{
298+
return __CPROVER_saturating_minus(a, b);
299+
}
300+
301+
#ifdef LIBRARY_CHECK
302+
# undef __CPROVER_saturating_minus
303+
#endif
304+
305+
/* FUNCTION: __builtin_ia32_vec_init_v4hi */
306+
307+
#ifndef LIBRARY_CHECK
308+
typedef short __gcc_v4hi __attribute__((__vector_size__(8)));
309+
#endif
310+
311+
__gcc_v4hi __builtin_ia32_vec_init_v4hi(short e0, short e1, short e2, short e3)
312+
{
313+
return (__gcc_v4hi){e0, e1, e2, e3};
314+
}

src/ansi-c/library/intrin.c

+196
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,78 @@ inline __m128i _mm_setr_epi32(int e3, int e2, int e1, int e0)
371371
}
372372
#endif
373373

374+
/* FUNCTION: _mm_set_epi16 */
375+
376+
#ifdef _MSC_VER
377+
# ifndef __CPROVER_INTRIN_H_INCLUDED
378+
# include <intrin.h>
379+
# define __CPROVER_INTRIN_H_INCLUDED
380+
# endif
381+
382+
inline __m128i _mm_set_epi16(
383+
short e7,
384+
short e6,
385+
short e5,
386+
short e4,
387+
short e3,
388+
short e2,
389+
short e1,
390+
short e0)
391+
{
392+
return (__m128i){.m128i_i16 = {e0, e1, e2, e3, e4, e5, e6, e7}};
393+
}
394+
#endif
395+
396+
/* FUNCTION: _mm_setr_epi16 */
397+
398+
#ifdef _MSC_VER
399+
# ifndef __CPROVER_INTRIN_H_INCLUDED
400+
# include <intrin.h>
401+
# define __CPROVER_INTRIN_H_INCLUDED
402+
# endif
403+
404+
inline __m128i _mm_setr_epi16(
405+
short e7,
406+
short e6,
407+
short e5,
408+
short e4,
409+
short e3,
410+
short e2,
411+
short e1,
412+
short e0)
413+
{
414+
return (__m128i){.m128i_i16 = {e7, e6, e5, e4, e3, e2, e1, e0}};
415+
}
416+
#endif
417+
418+
/* FUNCTION: _mm_set_pi16 */
419+
420+
#ifdef _MSC_VER
421+
# ifndef __CPROVER_INTRIN_H_INCLUDED
422+
# include <intrin.h>
423+
# define __CPROVER_INTRIN_H_INCLUDED
424+
# endif
425+
426+
inline __m64 _mm_set_pi16(short e3, short e2, short e1, short e0)
427+
{
428+
return (__m64){.m64_i16 = {e0, e1, e2, e3}};
429+
}
430+
#endif
431+
432+
/* FUNCTION: _mm_setr_pi16 */
433+
434+
#ifdef _MSC_VER
435+
# ifndef __CPROVER_INTRIN_H_INCLUDED
436+
# include <intrin.h>
437+
# define __CPROVER_INTRIN_H_INCLUDED
438+
# endif
439+
440+
inline __m64 _mm_setr_pi16(short e3, short e2, short e1, short e0)
441+
{
442+
return (__m64){.m64_i16 = {e3, e2, e1, e0}};
443+
}
444+
#endif
445+
374446
/* FUNCTION: _mm_extract_epi32 */
375447

376448
#ifdef _MSC_VER
@@ -384,3 +456,127 @@ inline int _mm_extract_epi32(__m128i a, const int imm8)
384456
return a.m128i_i32[imm8];
385457
}
386458
#endif
459+
460+
/* FUNCTION: _mm_extract_epi16 */
461+
462+
#ifdef _MSC_VER
463+
# ifndef __CPROVER_INTRIN_H_INCLUDED
464+
# include <intrin.h>
465+
# define __CPROVER_INTRIN_H_INCLUDED
466+
# endif
467+
468+
inline int _mm_extract_epi16(__m128i a, const int imm8)
469+
{
470+
return a.m128i_i16[imm8];
471+
}
472+
#endif
473+
474+
/* FUNCTION: _mm_extract_pi16 */
475+
476+
#ifdef _MSC_VER
477+
# ifndef __CPROVER_INTRIN_H_INCLUDED
478+
# include <intrin.h>
479+
# define __CPROVER_INTRIN_H_INCLUDED
480+
# endif
481+
482+
inline int _mm_extract_pi16(__m64 a, const int imm8)
483+
{
484+
return a.m64_i16[imm8];
485+
}
486+
#endif
487+
488+
/* FUNCTION: _mm_adds_epi16 */
489+
490+
#ifdef _MSC_VER
491+
# ifndef __CPROVER_INTRIN_H_INCLUDED
492+
# include <intrin.h>
493+
# define __CPROVER_INTRIN_H_INCLUDED
494+
# endif
495+
496+
inline __m128i _mm_adds_epi16(__m128i a, __m128i b)
497+
{
498+
return (__m128i){
499+
.m128i_i16 = {
500+
__CPROVER_saturating_plus(a.m128i_i16[0], b.m128i_i16[0]),
501+
__CPROVER_saturating_plus(a.m128i_i16[1], b.m128i_i16[1]),
502+
__CPROVER_saturating_plus(a.m128i_i16[2], b.m128i_i16[2]),
503+
__CPROVER_saturating_plus(a.m128i_i16[3], b.m128i_i16[3]),
504+
__CPROVER_saturating_plus(a.m128i_i16[4], b.m128i_i16[4]),
505+
__CPROVER_saturating_plus(a.m128i_i16[5], b.m128i_i16[5]),
506+
__CPROVER_saturating_plus(a.m128i_i16[6], b.m128i_i16[6]),
507+
__CPROVER_saturating_plus(a.m128i_i16[7], b.m128i_i16[7]),
508+
}};
509+
}
510+
#endif
511+
512+
/* FUNCTION: _mm_subs_epi16 */
513+
514+
#ifdef _MSC_VER
515+
# ifndef __CPROVER_INTRIN_H_INCLUDED
516+
# include <intrin.h>
517+
# define __CPROVER_INTRIN_H_INCLUDED
518+
# endif
519+
520+
inline __m128i _mm_subs_epi16(__m128i a, __m128i b)
521+
{
522+
return (__m128i){
523+
.m128i_i16 = {
524+
__CPROVER_saturating_minus(a.m128i_i16[0], b.m128i_i16[0]),
525+
__CPROVER_saturating_minus(a.m128i_i16[1], b.m128i_i16[1]),
526+
__CPROVER_saturating_minus(a.m128i_i16[2], b.m128i_i16[2]),
527+
__CPROVER_saturating_minus(a.m128i_i16[3], b.m128i_i16[3]),
528+
__CPROVER_saturating_minus(a.m128i_i16[4], b.m128i_i16[4]),
529+
__CPROVER_saturating_minus(a.m128i_i16[5], b.m128i_i16[5]),
530+
__CPROVER_saturating_minus(a.m128i_i16[6], b.m128i_i16[6]),
531+
__CPROVER_saturating_minus(a.m128i_i16[7], b.m128i_i16[7]),
532+
}};
533+
}
534+
#endif
535+
536+
/* FUNCTION: _mm_subs_epi16 */
537+
538+
#ifdef _MSC_VER
539+
# ifndef __CPROVER_INTRIN_H_INCLUDED
540+
# include <intrin.h>
541+
# define __CPROVER_INTRIN_H_INCLUDED
542+
# endif
543+
544+
inline __m128i _mm_subs_epi16(__m128i a, __m128i b)
545+
{
546+
return (__m128i){
547+
.m128i_i16 = {
548+
__CPROVER_saturating_minus(a.m128i_i16[0], b.m128i_i16[0]),
549+
__CPROVER_saturating_minus(a.m128i_i16[1], b.m128i_i16[1]),
550+
__CPROVER_saturating_minus(a.m128i_i16[2], b.m128i_i16[2]),
551+
__CPROVER_saturating_minus(a.m128i_i16[3], b.m128i_i16[3]),
552+
__CPROVER_saturating_minus(a.m128i_i16[4], b.m128i_i16[4]),
553+
__CPROVER_saturating_minus(a.m128i_i16[5], b.m128i_i16[5]),
554+
__CPROVER_saturating_minus(a.m128i_i16[6], b.m128i_i16[6]),
555+
__CPROVER_saturating_minus(a.m128i_i16[7], b.m128i_i16[7]),
556+
}};
557+
}
558+
#endif
559+
560+
/* FUNCTION: _mm_subs_epu16 */
561+
562+
#ifdef _MSC_VER
563+
# ifndef __CPROVER_INTRIN_H_INCLUDED
564+
# include <intrin.h>
565+
# define __CPROVER_INTRIN_H_INCLUDED
566+
# endif
567+
568+
inline __m128i _mm_subs_epu16(__m128i a, __m128i b)
569+
{
570+
return (__m128i){
571+
.m128i_u16 = {
572+
__CPROVER_saturating_minus(a.m128i_u16[0], b.m128i_u16[0]),
573+
__CPROVER_saturating_minus(a.m128i_u16[1], b.m128i_u16[1]),
574+
__CPROVER_saturating_minus(a.m128i_u16[2], b.m128i_u16[2]),
575+
__CPROVER_saturating_minus(a.m128i_u16[3], b.m128i_u16[3]),
576+
__CPROVER_saturating_minus(a.m128i_u16[4], b.m128i_u16[4]),
577+
__CPROVER_saturating_minus(a.m128i_u16[5], b.m128i_u16[5]),
578+
__CPROVER_saturating_minus(a.m128i_u16[6], b.m128i_u16[6]),
579+
__CPROVER_saturating_minus(a.m128i_u16[7], b.m128i_u16[7]),
580+
}};
581+
}
582+
#endif

0 commit comments

Comments
 (0)