|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 |
| -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE |
3 |
| -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE |
| 2 | +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2 |
| 3 | +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE4 |
4 | 4 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
|
5 | 5 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
|
6 | 6 | ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
|
@@ -333,6 +333,156 @@ define void @fshl_v16i32() {
|
333 | 333 | }
|
334 | 334 |
|
335 | 335 | define void @fshl_v32i16() {
|
| 336 | +; SSE2-LABEL: @fshl_v32i16( |
| 337 | +; SSE2-NEXT: [[A0:%.*]] = load i16, ptr @a16, align 2 |
| 338 | +; SSE2-NEXT: [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2 |
| 339 | +; SSE2-NEXT: [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2 |
| 340 | +; SSE2-NEXT: [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2 |
| 341 | +; SSE2-NEXT: [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2 |
| 342 | +; SSE2-NEXT: [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2 |
| 343 | +; SSE2-NEXT: [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2 |
| 344 | +; SSE2-NEXT: [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2 |
| 345 | +; SSE2-NEXT: [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2 |
| 346 | +; SSE2-NEXT: [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2 |
| 347 | +; SSE2-NEXT: [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2 |
| 348 | +; SSE2-NEXT: [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2 |
| 349 | +; SSE2-NEXT: [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2 |
| 350 | +; SSE2-NEXT: [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2 |
| 351 | +; SSE2-NEXT: [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2 |
| 352 | +; SSE2-NEXT: [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2 |
| 353 | +; SSE2-NEXT: [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 |
| 354 | +; SSE2-NEXT: [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2 |
| 355 | +; SSE2-NEXT: [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2 |
| 356 | +; SSE2-NEXT: [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2 |
| 357 | +; SSE2-NEXT: [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2 |
| 358 | +; SSE2-NEXT: [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2 |
| 359 | +; SSE2-NEXT: [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2 |
| 360 | +; SSE2-NEXT: [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2 |
| 361 | +; SSE2-NEXT: [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 |
| 362 | +; SSE2-NEXT: [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2 |
| 363 | +; SSE2-NEXT: [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2 |
| 364 | +; SSE2-NEXT: [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2 |
| 365 | +; SSE2-NEXT: [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2 |
| 366 | +; SSE2-NEXT: [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2 |
| 367 | +; SSE2-NEXT: [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2 |
| 368 | +; SSE2-NEXT: [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2 |
| 369 | +; SSE2-NEXT: [[B0:%.*]] = load i16, ptr @b16, align 2 |
| 370 | +; SSE2-NEXT: [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2 |
| 371 | +; SSE2-NEXT: [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2 |
| 372 | +; SSE2-NEXT: [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2 |
| 373 | +; SSE2-NEXT: [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2 |
| 374 | +; SSE2-NEXT: [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2 |
| 375 | +; SSE2-NEXT: [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2 |
| 376 | +; SSE2-NEXT: [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2 |
| 377 | +; SSE2-NEXT: [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2 |
| 378 | +; SSE2-NEXT: [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2 |
| 379 | +; SSE2-NEXT: [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2 |
| 380 | +; SSE2-NEXT: [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2 |
| 381 | +; SSE2-NEXT: [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2 |
| 382 | +; SSE2-NEXT: [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2 |
| 383 | +; SSE2-NEXT: [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2 |
| 384 | +; SSE2-NEXT: [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2 |
| 385 | +; SSE2-NEXT: [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 |
| 386 | +; SSE2-NEXT: [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2 |
| 387 | +; SSE2-NEXT: [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2 |
| 388 | +; SSE2-NEXT: [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2 |
| 389 | +; SSE2-NEXT: [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2 |
| 390 | +; SSE2-NEXT: [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2 |
| 391 | +; SSE2-NEXT: [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2 |
| 392 | +; SSE2-NEXT: [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2 |
| 393 | +; SSE2-NEXT: [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 |
| 394 | +; SSE2-NEXT: [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2 |
| 395 | +; SSE2-NEXT: [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2 |
| 396 | +; SSE2-NEXT: [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2 |
| 397 | +; SSE2-NEXT: [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2 |
| 398 | +; SSE2-NEXT: [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2 |
| 399 | +; SSE2-NEXT: [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2 |
| 400 | +; SSE2-NEXT: [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2 |
| 401 | +; SSE2-NEXT: [[R0:%.*]] = call i16 @llvm.fshl.i16(i16 [[A0]], i16 [[A0]], i16 [[B0]]) |
| 402 | +; SSE2-NEXT: [[R1:%.*]] = call i16 @llvm.fshl.i16(i16 [[A1]], i16 [[A1]], i16 [[B1]]) |
| 403 | +; SSE2-NEXT: [[R2:%.*]] = call i16 @llvm.fshl.i16(i16 [[A2]], i16 [[A2]], i16 [[B2]]) |
| 404 | +; SSE2-NEXT: [[R3:%.*]] = call i16 @llvm.fshl.i16(i16 [[A3]], i16 [[A3]], i16 [[B3]]) |
| 405 | +; SSE2-NEXT: [[R4:%.*]] = call i16 @llvm.fshl.i16(i16 [[A4]], i16 [[A4]], i16 [[B4]]) |
| 406 | +; SSE2-NEXT: [[R5:%.*]] = call i16 @llvm.fshl.i16(i16 [[A5]], i16 [[A5]], i16 [[B5]]) |
| 407 | +; SSE2-NEXT: [[R6:%.*]] = call i16 @llvm.fshl.i16(i16 [[A6]], i16 [[A6]], i16 [[B6]]) |
| 408 | +; SSE2-NEXT: [[R7:%.*]] = call i16 @llvm.fshl.i16(i16 [[A7]], i16 [[A7]], i16 [[B7]]) |
| 409 | +; SSE2-NEXT: [[R8:%.*]] = call i16 @llvm.fshl.i16(i16 [[A8]], i16 [[A8]], i16 [[B8]]) |
| 410 | +; SSE2-NEXT: [[R9:%.*]] = call i16 @llvm.fshl.i16(i16 [[A9]], i16 [[A9]], i16 [[B9]]) |
| 411 | +; SSE2-NEXT: [[R10:%.*]] = call i16 @llvm.fshl.i16(i16 [[A10]], i16 [[A10]], i16 [[B10]]) |
| 412 | +; SSE2-NEXT: [[R11:%.*]] = call i16 @llvm.fshl.i16(i16 [[A11]], i16 [[A11]], i16 [[B11]]) |
| 413 | +; SSE2-NEXT: [[R12:%.*]] = call i16 @llvm.fshl.i16(i16 [[A12]], i16 [[A12]], i16 [[B12]]) |
| 414 | +; SSE2-NEXT: [[R13:%.*]] = call i16 @llvm.fshl.i16(i16 [[A13]], i16 [[A13]], i16 [[B13]]) |
| 415 | +; SSE2-NEXT: [[R14:%.*]] = call i16 @llvm.fshl.i16(i16 [[A14]], i16 [[A14]], i16 [[B14]]) |
| 416 | +; SSE2-NEXT: [[R15:%.*]] = call i16 @llvm.fshl.i16(i16 [[A15]], i16 [[A15]], i16 [[B15]]) |
| 417 | +; SSE2-NEXT: [[R16:%.*]] = call i16 @llvm.fshl.i16(i16 [[A16]], i16 [[A16]], i16 [[B16]]) |
| 418 | +; SSE2-NEXT: [[R17:%.*]] = call i16 @llvm.fshl.i16(i16 [[A17]], i16 [[A17]], i16 [[B17]]) |
| 419 | +; SSE2-NEXT: [[R18:%.*]] = call i16 @llvm.fshl.i16(i16 [[A18]], i16 [[A18]], i16 [[B18]]) |
| 420 | +; SSE2-NEXT: [[R19:%.*]] = call i16 @llvm.fshl.i16(i16 [[A19]], i16 [[A19]], i16 [[B19]]) |
| 421 | +; SSE2-NEXT: [[R20:%.*]] = call i16 @llvm.fshl.i16(i16 [[A20]], i16 [[A20]], i16 [[B20]]) |
| 422 | +; SSE2-NEXT: [[R21:%.*]] = call i16 @llvm.fshl.i16(i16 [[A21]], i16 [[A21]], i16 [[B21]]) |
| 423 | +; SSE2-NEXT: [[R22:%.*]] = call i16 @llvm.fshl.i16(i16 [[A22]], i16 [[A22]], i16 [[B22]]) |
| 424 | +; SSE2-NEXT: [[R23:%.*]] = call i16 @llvm.fshl.i16(i16 [[A23]], i16 [[A23]], i16 [[B23]]) |
| 425 | +; SSE2-NEXT: [[R24:%.*]] = call i16 @llvm.fshl.i16(i16 [[A24]], i16 [[A24]], i16 [[B24]]) |
| 426 | +; SSE2-NEXT: [[R25:%.*]] = call i16 @llvm.fshl.i16(i16 [[A25]], i16 [[A25]], i16 [[B25]]) |
| 427 | +; SSE2-NEXT: [[R26:%.*]] = call i16 @llvm.fshl.i16(i16 [[A26]], i16 [[A26]], i16 [[B26]]) |
| 428 | +; SSE2-NEXT: [[R27:%.*]] = call i16 @llvm.fshl.i16(i16 [[A27]], i16 [[A27]], i16 [[B27]]) |
| 429 | +; SSE2-NEXT: [[R28:%.*]] = call i16 @llvm.fshl.i16(i16 [[A28]], i16 [[A28]], i16 [[B28]]) |
| 430 | +; SSE2-NEXT: [[R29:%.*]] = call i16 @llvm.fshl.i16(i16 [[A29]], i16 [[A29]], i16 [[B29]]) |
| 431 | +; SSE2-NEXT: [[R30:%.*]] = call i16 @llvm.fshl.i16(i16 [[A30]], i16 [[A30]], i16 [[B30]]) |
| 432 | +; SSE2-NEXT: [[R31:%.*]] = call i16 @llvm.fshl.i16(i16 [[A31]], i16 [[A31]], i16 [[B31]]) |
| 433 | +; SSE2-NEXT: store i16 [[R0]], ptr @d16, align 2 |
| 434 | +; SSE2-NEXT: store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 1), align 2 |
| 435 | +; SSE2-NEXT: store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 2), align 2 |
| 436 | +; SSE2-NEXT: store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 3), align 2 |
| 437 | +; SSE2-NEXT: store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 4), align 2 |
| 438 | +; SSE2-NEXT: store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 5), align 2 |
| 439 | +; SSE2-NEXT: store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 6), align 2 |
| 440 | +; SSE2-NEXT: store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 7), align 2 |
| 441 | +; SSE2-NEXT: store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8), align 2 |
| 442 | +; SSE2-NEXT: store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 9), align 2 |
| 443 | +; SSE2-NEXT: store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 10), align 2 |
| 444 | +; SSE2-NEXT: store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 11), align 2 |
| 445 | +; SSE2-NEXT: store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 12), align 2 |
| 446 | +; SSE2-NEXT: store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 13), align 2 |
| 447 | +; SSE2-NEXT: store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 14), align 2 |
| 448 | +; SSE2-NEXT: store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 15), align 2 |
| 449 | +; SSE2-NEXT: store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2 |
| 450 | +; SSE2-NEXT: store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 17), align 2 |
| 451 | +; SSE2-NEXT: store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 18), align 2 |
| 452 | +; SSE2-NEXT: store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 19), align 2 |
| 453 | +; SSE2-NEXT: store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 20), align 2 |
| 454 | +; SSE2-NEXT: store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 21), align 2 |
| 455 | +; SSE2-NEXT: store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 22), align 2 |
| 456 | +; SSE2-NEXT: store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 23), align 2 |
| 457 | +; SSE2-NEXT: store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2 |
| 458 | +; SSE2-NEXT: store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 25), align 2 |
| 459 | +; SSE2-NEXT: store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 26), align 2 |
| 460 | +; SSE2-NEXT: store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 27), align 2 |
| 461 | +; SSE2-NEXT: store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 28), align 2 |
| 462 | +; SSE2-NEXT: store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 29), align 2 |
| 463 | +; SSE2-NEXT: store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 30), align 2 |
| 464 | +; SSE2-NEXT: store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 31), align 2 |
| 465 | +; SSE2-NEXT: ret void |
| 466 | +; |
| 467 | +; SSE4-LABEL: @fshl_v32i16( |
| 468 | +; SSE4-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2 |
| 469 | +; SSE4-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2 |
| 470 | +; SSE4-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) |
| 471 | +; SSE4-NEXT: store <8 x i16> [[TMP3]], ptr @d16, align 2 |
| 472 | +; SSE4-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2 |
| 473 | +; SSE4-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2 |
| 474 | +; SSE4-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]]) |
| 475 | +; SSE4-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8), align 2 |
| 476 | +; SSE4-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 |
| 477 | +; SSE4-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 |
| 478 | +; SSE4-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) |
| 479 | +; SSE4-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2 |
| 480 | +; SSE4-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 |
| 481 | +; SSE4-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 |
| 482 | +; SSE4-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) |
| 483 | +; SSE4-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2 |
| 484 | +; SSE4-NEXT: ret void |
| 485 | +; |
336 | 486 | ; AVX-LABEL: @fshl_v32i16(
|
337 | 487 | ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
|
338 | 488 | ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
|
|
0 commit comments