Skip to content

Commit 625e0a4

Browse files
committed
[SLP][X86] Add missing SSE2/SSE4 checks from vector rotate tests
1 parent f8a56df commit 625e0a4

File tree

2 files changed

+304
-4
lines changed

2 files changed

+304
-4
lines changed

llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll

Lines changed: 152 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
3-
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
2+
; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE4
44
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
55
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
66
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
@@ -333,6 +333,156 @@ define void @fshl_v16i32() {
333333
}
334334

335335
define void @fshl_v32i16() {
336+
; SSE2-LABEL: @fshl_v32i16(
337+
; SSE2-NEXT: [[A0:%.*]] = load i16, ptr @a16, align 2
338+
; SSE2-NEXT: [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
339+
; SSE2-NEXT: [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
340+
; SSE2-NEXT: [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
341+
; SSE2-NEXT: [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
342+
; SSE2-NEXT: [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
343+
; SSE2-NEXT: [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
344+
; SSE2-NEXT: [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
345+
; SSE2-NEXT: [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
346+
; SSE2-NEXT: [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
347+
; SSE2-NEXT: [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
348+
; SSE2-NEXT: [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
349+
; SSE2-NEXT: [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
350+
; SSE2-NEXT: [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
351+
; SSE2-NEXT: [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
352+
; SSE2-NEXT: [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
353+
; SSE2-NEXT: [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
354+
; SSE2-NEXT: [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
355+
; SSE2-NEXT: [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
356+
; SSE2-NEXT: [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
357+
; SSE2-NEXT: [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
358+
; SSE2-NEXT: [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
359+
; SSE2-NEXT: [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
360+
; SSE2-NEXT: [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
361+
; SSE2-NEXT: [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
362+
; SSE2-NEXT: [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
363+
; SSE2-NEXT: [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
364+
; SSE2-NEXT: [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
365+
; SSE2-NEXT: [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
366+
; SSE2-NEXT: [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
367+
; SSE2-NEXT: [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
368+
; SSE2-NEXT: [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
369+
; SSE2-NEXT: [[B0:%.*]] = load i16, ptr @b16, align 2
370+
; SSE2-NEXT: [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
371+
; SSE2-NEXT: [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
372+
; SSE2-NEXT: [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
373+
; SSE2-NEXT: [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
374+
; SSE2-NEXT: [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
375+
; SSE2-NEXT: [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
376+
; SSE2-NEXT: [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
377+
; SSE2-NEXT: [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
378+
; SSE2-NEXT: [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
379+
; SSE2-NEXT: [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
380+
; SSE2-NEXT: [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
381+
; SSE2-NEXT: [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
382+
; SSE2-NEXT: [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
383+
; SSE2-NEXT: [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
384+
; SSE2-NEXT: [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
385+
; SSE2-NEXT: [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
386+
; SSE2-NEXT: [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
387+
; SSE2-NEXT: [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
388+
; SSE2-NEXT: [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
389+
; SSE2-NEXT: [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
390+
; SSE2-NEXT: [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
391+
; SSE2-NEXT: [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
392+
; SSE2-NEXT: [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
393+
; SSE2-NEXT: [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
394+
; SSE2-NEXT: [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
395+
; SSE2-NEXT: [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
396+
; SSE2-NEXT: [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
397+
; SSE2-NEXT: [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
398+
; SSE2-NEXT: [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
399+
; SSE2-NEXT: [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
400+
; SSE2-NEXT: [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
401+
; SSE2-NEXT: [[R0:%.*]] = call i16 @llvm.fshl.i16(i16 [[A0]], i16 [[A0]], i16 [[B0]])
402+
; SSE2-NEXT: [[R1:%.*]] = call i16 @llvm.fshl.i16(i16 [[A1]], i16 [[A1]], i16 [[B1]])
403+
; SSE2-NEXT: [[R2:%.*]] = call i16 @llvm.fshl.i16(i16 [[A2]], i16 [[A2]], i16 [[B2]])
404+
; SSE2-NEXT: [[R3:%.*]] = call i16 @llvm.fshl.i16(i16 [[A3]], i16 [[A3]], i16 [[B3]])
405+
; SSE2-NEXT: [[R4:%.*]] = call i16 @llvm.fshl.i16(i16 [[A4]], i16 [[A4]], i16 [[B4]])
406+
; SSE2-NEXT: [[R5:%.*]] = call i16 @llvm.fshl.i16(i16 [[A5]], i16 [[A5]], i16 [[B5]])
407+
; SSE2-NEXT: [[R6:%.*]] = call i16 @llvm.fshl.i16(i16 [[A6]], i16 [[A6]], i16 [[B6]])
408+
; SSE2-NEXT: [[R7:%.*]] = call i16 @llvm.fshl.i16(i16 [[A7]], i16 [[A7]], i16 [[B7]])
409+
; SSE2-NEXT: [[R8:%.*]] = call i16 @llvm.fshl.i16(i16 [[A8]], i16 [[A8]], i16 [[B8]])
410+
; SSE2-NEXT: [[R9:%.*]] = call i16 @llvm.fshl.i16(i16 [[A9]], i16 [[A9]], i16 [[B9]])
411+
; SSE2-NEXT: [[R10:%.*]] = call i16 @llvm.fshl.i16(i16 [[A10]], i16 [[A10]], i16 [[B10]])
412+
; SSE2-NEXT: [[R11:%.*]] = call i16 @llvm.fshl.i16(i16 [[A11]], i16 [[A11]], i16 [[B11]])
413+
; SSE2-NEXT: [[R12:%.*]] = call i16 @llvm.fshl.i16(i16 [[A12]], i16 [[A12]], i16 [[B12]])
414+
; SSE2-NEXT: [[R13:%.*]] = call i16 @llvm.fshl.i16(i16 [[A13]], i16 [[A13]], i16 [[B13]])
415+
; SSE2-NEXT: [[R14:%.*]] = call i16 @llvm.fshl.i16(i16 [[A14]], i16 [[A14]], i16 [[B14]])
416+
; SSE2-NEXT: [[R15:%.*]] = call i16 @llvm.fshl.i16(i16 [[A15]], i16 [[A15]], i16 [[B15]])
417+
; SSE2-NEXT: [[R16:%.*]] = call i16 @llvm.fshl.i16(i16 [[A16]], i16 [[A16]], i16 [[B16]])
418+
; SSE2-NEXT: [[R17:%.*]] = call i16 @llvm.fshl.i16(i16 [[A17]], i16 [[A17]], i16 [[B17]])
419+
; SSE2-NEXT: [[R18:%.*]] = call i16 @llvm.fshl.i16(i16 [[A18]], i16 [[A18]], i16 [[B18]])
420+
; SSE2-NEXT: [[R19:%.*]] = call i16 @llvm.fshl.i16(i16 [[A19]], i16 [[A19]], i16 [[B19]])
421+
; SSE2-NEXT: [[R20:%.*]] = call i16 @llvm.fshl.i16(i16 [[A20]], i16 [[A20]], i16 [[B20]])
422+
; SSE2-NEXT: [[R21:%.*]] = call i16 @llvm.fshl.i16(i16 [[A21]], i16 [[A21]], i16 [[B21]])
423+
; SSE2-NEXT: [[R22:%.*]] = call i16 @llvm.fshl.i16(i16 [[A22]], i16 [[A22]], i16 [[B22]])
424+
; SSE2-NEXT: [[R23:%.*]] = call i16 @llvm.fshl.i16(i16 [[A23]], i16 [[A23]], i16 [[B23]])
425+
; SSE2-NEXT: [[R24:%.*]] = call i16 @llvm.fshl.i16(i16 [[A24]], i16 [[A24]], i16 [[B24]])
426+
; SSE2-NEXT: [[R25:%.*]] = call i16 @llvm.fshl.i16(i16 [[A25]], i16 [[A25]], i16 [[B25]])
427+
; SSE2-NEXT: [[R26:%.*]] = call i16 @llvm.fshl.i16(i16 [[A26]], i16 [[A26]], i16 [[B26]])
428+
; SSE2-NEXT: [[R27:%.*]] = call i16 @llvm.fshl.i16(i16 [[A27]], i16 [[A27]], i16 [[B27]])
429+
; SSE2-NEXT: [[R28:%.*]] = call i16 @llvm.fshl.i16(i16 [[A28]], i16 [[A28]], i16 [[B28]])
430+
; SSE2-NEXT: [[R29:%.*]] = call i16 @llvm.fshl.i16(i16 [[A29]], i16 [[A29]], i16 [[B29]])
431+
; SSE2-NEXT: [[R30:%.*]] = call i16 @llvm.fshl.i16(i16 [[A30]], i16 [[A30]], i16 [[B30]])
432+
; SSE2-NEXT: [[R31:%.*]] = call i16 @llvm.fshl.i16(i16 [[A31]], i16 [[A31]], i16 [[B31]])
433+
; SSE2-NEXT: store i16 [[R0]], ptr @d16, align 2
434+
; SSE2-NEXT: store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 1), align 2
435+
; SSE2-NEXT: store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 2), align 2
436+
; SSE2-NEXT: store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 3), align 2
437+
; SSE2-NEXT: store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 4), align 2
438+
; SSE2-NEXT: store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 5), align 2
439+
; SSE2-NEXT: store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 6), align 2
440+
; SSE2-NEXT: store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 7), align 2
441+
; SSE2-NEXT: store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8), align 2
442+
; SSE2-NEXT: store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 9), align 2
443+
; SSE2-NEXT: store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 10), align 2
444+
; SSE2-NEXT: store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 11), align 2
445+
; SSE2-NEXT: store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 12), align 2
446+
; SSE2-NEXT: store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 13), align 2
447+
; SSE2-NEXT: store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 14), align 2
448+
; SSE2-NEXT: store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 15), align 2
449+
; SSE2-NEXT: store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2
450+
; SSE2-NEXT: store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 17), align 2
451+
; SSE2-NEXT: store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 18), align 2
452+
; SSE2-NEXT: store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 19), align 2
453+
; SSE2-NEXT: store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 20), align 2
454+
; SSE2-NEXT: store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 21), align 2
455+
; SSE2-NEXT: store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 22), align 2
456+
; SSE2-NEXT: store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 23), align 2
457+
; SSE2-NEXT: store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2
458+
; SSE2-NEXT: store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 25), align 2
459+
; SSE2-NEXT: store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 26), align 2
460+
; SSE2-NEXT: store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 27), align 2
461+
; SSE2-NEXT: store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 28), align 2
462+
; SSE2-NEXT: store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 29), align 2
463+
; SSE2-NEXT: store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 30), align 2
464+
; SSE2-NEXT: store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 31), align 2
465+
; SSE2-NEXT: ret void
466+
;
467+
; SSE4-LABEL: @fshl_v32i16(
468+
; SSE4-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
469+
; SSE4-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
470+
; SSE4-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
471+
; SSE4-NEXT: store <8 x i16> [[TMP3]], ptr @d16, align 2
472+
; SSE4-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
473+
; SSE4-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
474+
; SSE4-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
475+
; SSE4-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8), align 2
476+
; SSE4-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
477+
; SSE4-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
478+
; SSE4-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
479+
; SSE4-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2
480+
; SSE4-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
481+
; SSE4-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
482+
; SSE4-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
483+
; SSE4-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2
484+
; SSE4-NEXT: ret void
485+
;
336486
; AVX-LABEL: @fshl_v32i16(
337487
; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
338488
; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2

0 commit comments

Comments
 (0)