|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
|
3 |
| -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-SDAG %s |
4 |
| -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-GISEL %s |
5 |
| -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-SDAG %s |
6 |
| -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-GISEL %s |
| 3 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-SDAG,GFX90A-SDAG %s |
| 4 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-GISEL,GFX90A-GISEL %s |
| 5 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-SDAG,GFX942-SDAG %s |
| 6 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=PACKED,PACKED-GISEL,GFX942-GISEL %s |
7 | 7 |
|
8 | 8 | define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
|
9 | 9 | ; GFX900-LABEL: fadd_v2_vv:
|
@@ -411,10 +411,12 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
|
411 | 411 | ; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
412 | 412 | ; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
413 | 413 | ; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
|
| 414 | +; PACKED-GISEL-NEXT: s_mov_b32 s2, 1.0 |
| 415 | +; PACKED-GISEL-NEXT: s_mov_b32 s3, s2 |
414 | 416 | ; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
415 | 417 | ; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
|
416 | 418 | ; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
|
417 |
| -; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 |
| 419 | +; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] |
418 | 420 | ; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
419 | 421 | ; PACKED-GISEL-NEXT: s_endpgm
|
420 | 422 | %id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
@@ -1186,10 +1188,12 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
|
1186 | 1188 | ; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
1187 | 1189 | ; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
1188 | 1190 | ; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
|
| 1191 | +; PACKED-GISEL-NEXT: s_mov_b32 s2, 4.0 |
| 1192 | +; PACKED-GISEL-NEXT: s_mov_b32 s3, s2 |
1189 | 1193 | ; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
1190 | 1194 | ; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
|
1191 | 1195 | ; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
|
1192 |
| -; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], v[0:1], 4.0 |
| 1196 | +; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[2:3] |
1193 | 1197 | ; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
1194 | 1198 | ; PACKED-GISEL-NEXT: s_endpgm
|
1195 | 1199 | %id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
@@ -1594,6 +1598,40 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
|
1594 | 1598 | ; PACKED-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] op_sel_hi:[1,0,0]
|
1595 | 1599 | ; PACKED-SDAG-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1]
|
1596 | 1600 | ; PACKED-SDAG-NEXT: s_endpgm
|
| 1601 | +; |
| 1602 | +; GFX90A-GISEL-LABEL: fma_v2_v_imm: |
| 1603 | +; GFX90A-GISEL: ; %bb.0: |
| 1604 | +; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1605 | +; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1606 | +; GFX90A-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1607 | +; GFX90A-GISEL-NEXT: s_mov_b32 s4, 0x43480000 |
| 1608 | +; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0x42c80000 |
| 1609 | +; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1610 | +; GFX90A-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1611 | +; GFX90A-GISEL-NEXT: s_mov_b32 s5, s4 |
| 1612 | +; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2 |
| 1613 | +; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] |
| 1614 | +; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1615 | +; GFX90A-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1616 | +; GFX90A-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1617 | +; GFX90A-GISEL-NEXT: s_endpgm |
| 1618 | +; |
| 1619 | +; GFX942-GISEL-LABEL: fma_v2_v_imm: |
| 1620 | +; GFX942-GISEL: ; %bb.0: |
| 1621 | +; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1622 | +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1623 | +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1624 | +; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x43480000 |
| 1625 | +; GFX942-GISEL-NEXT: s_mov_b32 s2, 0x42c80000 |
| 1626 | +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1627 | +; GFX942-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1628 | +; GFX942-GISEL-NEXT: s_mov_b32 s5, s4 |
| 1629 | +; GFX942-GISEL-NEXT: s_mov_b32 s3, s2 |
| 1630 | +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5] |
| 1631 | +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1632 | +; GFX942-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1633 | +; GFX942-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1634 | +; GFX942-GISEL-NEXT: s_endpgm |
1597 | 1635 | %id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
1598 | 1636 | %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
1599 | 1637 | %load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
@@ -1675,19 +1713,39 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
|
1675 | 1713 | ; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
1676 | 1714 | ; PACKED-SDAG-NEXT: s_endpgm
|
1677 | 1715 | ;
|
1678 |
| -; PACKED-GISEL-LABEL: fma_v2_v_lit_splat: |
1679 |
| -; PACKED-GISEL: ; %bb.0: |
1680 |
| -; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
1681 |
| -; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1682 |
| -; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 |
1683 |
| -; PACKED-GISEL-NEXT: s_mov_b32 s2, 1.0 |
1684 |
| -; PACKED-GISEL-NEXT: s_mov_b32 s3, s2 |
1685 |
| -; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1686 |
| -; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] |
1687 |
| -; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0) |
1688 |
| -; PACKED-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], 4.0, s[2:3] |
1689 |
| -; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
1690 |
| -; PACKED-GISEL-NEXT: s_endpgm |
| 1716 | +; GFX90A-GISEL-LABEL: fma_v2_v_lit_splat: |
| 1717 | +; GFX90A-GISEL: ; %bb.0: |
| 1718 | +; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1719 | +; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1720 | +; GFX90A-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1721 | +; GFX90A-GISEL-NEXT: s_mov_b32 s4, 1.0 |
| 1722 | +; GFX90A-GISEL-NEXT: s_mov_b32 s2, 4.0 |
| 1723 | +; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1724 | +; GFX90A-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1725 | +; GFX90A-GISEL-NEXT: s_mov_b32 s5, s4 |
| 1726 | +; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2 |
| 1727 | +; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] |
| 1728 | +; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1729 | +; GFX90A-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1730 | +; GFX90A-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1731 | +; GFX90A-GISEL-NEXT: s_endpgm |
| 1732 | +; |
| 1733 | +; GFX942-GISEL-LABEL: fma_v2_v_lit_splat: |
| 1734 | +; GFX942-GISEL: ; %bb.0: |
| 1735 | +; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1736 | +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1737 | +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1738 | +; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0 |
| 1739 | +; GFX942-GISEL-NEXT: s_mov_b32 s2, 4.0 |
| 1740 | +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1741 | +; GFX942-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1742 | +; GFX942-GISEL-NEXT: s_mov_b32 s5, s4 |
| 1743 | +; GFX942-GISEL-NEXT: s_mov_b32 s3, s2 |
| 1744 | +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5] |
| 1745 | +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1746 | +; GFX942-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1747 | +; GFX942-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1748 | +; GFX942-GISEL-NEXT: s_endpgm |
1691 | 1749 | %id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
1692 | 1750 | %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
1693 | 1751 | %load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
@@ -1725,6 +1783,40 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
1725 | 1783 | ; PACKED-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3]
|
1726 | 1784 | ; PACKED-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
|
1727 | 1785 | ; PACKED-SDAG-NEXT: s_endpgm
|
| 1786 | +; |
| 1787 | +; GFX90A-GISEL-LABEL: fma_v2_v_unfoldable_lit: |
| 1788 | +; GFX90A-GISEL: ; %bb.0: |
| 1789 | +; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1790 | +; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1791 | +; GFX90A-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1792 | +; GFX90A-GISEL-NEXT: s_mov_b32 s4, 1.0 |
| 1793 | +; GFX90A-GISEL-NEXT: s_mov_b32 s2, 4.0 |
| 1794 | +; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1795 | +; GFX90A-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1796 | +; GFX90A-GISEL-NEXT: s_mov_b32 s5, 2.0 |
| 1797 | +; GFX90A-GISEL-NEXT: s_mov_b32 s3, 0x40400000 |
| 1798 | +; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] |
| 1799 | +; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1800 | +; GFX90A-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1801 | +; GFX90A-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1802 | +; GFX90A-GISEL-NEXT: s_endpgm |
| 1803 | +; |
| 1804 | +; GFX942-GISEL-LABEL: fma_v2_v_unfoldable_lit: |
| 1805 | +; GFX942-GISEL: ; %bb.0: |
| 1806 | +; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 1807 | +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1808 | +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 |
| 1809 | +; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0 |
| 1810 | +; GFX942-GISEL-NEXT: s_mov_b32 s2, 4.0 |
| 1811 | +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 1812 | +; GFX942-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] |
| 1813 | +; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0 |
| 1814 | +; GFX942-GISEL-NEXT: s_mov_b32 s3, 0x40400000 |
| 1815 | +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5] |
| 1816 | +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 1817 | +; GFX942-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3] |
| 1818 | +; GFX942-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] |
| 1819 | +; GFX942-GISEL-NEXT: s_endpgm |
1728 | 1820 | %id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
1729 | 1821 | %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
1730 | 1822 | %load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
@@ -2059,6 +2151,37 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
|
2059 | 2151 | ; PACKED-SDAG-NEXT: v_mov_b32_e32 v0, s0
|
2060 | 2152 | ; PACKED-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
|
2061 | 2153 | ; PACKED-SDAG-NEXT: s_endpgm
|
| 2154 | +; |
| 2155 | +; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0: |
| 2156 | +; GFX90A-GISEL: ; %bb.0: ; %bb |
| 2157 | +; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 2158 | +; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0 |
| 2159 | +; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2 |
| 2160 | +; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] |
| 2161 | +; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 2162 | +; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] |
| 2163 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1 |
| 2164 | +; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0 |
| 2165 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| 2166 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v0 |
| 2167 | +; GFX90A-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| 2168 | +; GFX90A-GISEL-NEXT: s_endpgm |
| 2169 | +; |
| 2170 | +; GFX942-GISEL-LABEL: fadd_fadd_fsub_0: |
| 2171 | +; GFX942-GISEL: ; %bb.0: ; %bb |
| 2172 | +; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 2173 | +; GFX942-GISEL-NEXT: s_mov_b32 s2, 0 |
| 2174 | +; GFX942-GISEL-NEXT: s_mov_b32 s3, s2 |
| 2175 | +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3] |
| 2176 | +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 2177 | +; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] |
| 2178 | +; GFX942-GISEL-NEXT: s_nop 0 |
| 2179 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1 |
| 2180 | +; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0 |
| 2181 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| 2182 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v3, v0 |
| 2183 | +; GFX942-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| 2184 | +; GFX942-GISEL-NEXT: s_endpgm |
2062 | 2185 | bb:
|
2063 | 2186 | %i12 = fadd <2 x float> zeroinitializer, %arg
|
2064 | 2187 | %shift8 = shufflevector <2 x float> %i12, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
|
@@ -2099,6 +2222,40 @@ define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, p
|
2099 | 2222 | ; PACKED-SDAG-NEXT: v_pk_add_f32 v[0:1], v[2:3], s[2:3] neg_lo:[0,1] neg_hi:[0,1]
|
2100 | 2223 | ; PACKED-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[6:7]
|
2101 | 2224 | ; PACKED-SDAG-NEXT: s_endpgm
|
| 2225 | +; |
| 2226 | +; GFX90A-GISEL-LABEL: fadd_fadd_fsub: |
| 2227 | +; GFX90A-GISEL: ; %bb.0: ; %bb |
| 2228 | +; GFX90A-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2229 | +; GFX90A-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
| 2230 | +; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 2231 | +; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] |
| 2232 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s2 |
| 2233 | +; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] |
| 2234 | +; GFX90A-GISEL-NEXT: v_sub_f32_e32 v0, s0, v2 |
| 2235 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v1 |
| 2236 | +; GFX90A-GISEL-NEXT: v_pk_add_f32 v[2:3], s[2:3], v[2:3] |
| 2237 | +; GFX90A-GISEL-NEXT: v_subrev_f32_e32 v1, s3, v2 |
| 2238 | +; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 2239 | +; GFX90A-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] |
| 2240 | +; GFX90A-GISEL-NEXT: s_endpgm |
| 2241 | +; |
| 2242 | +; GFX942-GISEL-LABEL: fadd_fadd_fsub: |
| 2243 | +; GFX942-GISEL: ; %bb.0: ; %bb |
| 2244 | +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| 2245 | +; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 |
| 2246 | +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| 2247 | +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3] |
| 2248 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, s2 |
| 2249 | +; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] |
| 2250 | +; GFX942-GISEL-NEXT: s_nop 0 |
| 2251 | +; GFX942-GISEL-NEXT: v_sub_f32_e32 v0, s0, v2 |
| 2252 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, v1 |
| 2253 | +; GFX942-GISEL-NEXT: v_pk_add_f32 v[2:3], s[2:3], v[2:3] |
| 2254 | +; GFX942-GISEL-NEXT: s_nop 0 |
| 2255 | +; GFX942-GISEL-NEXT: v_subrev_f32_e32 v1, s3, v2 |
| 2256 | +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| 2257 | +; GFX942-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] |
| 2258 | +; GFX942-GISEL-NEXT: s_endpgm |
2102 | 2259 | bb:
|
2103 | 2260 | %i12 = fadd <2 x float> %arg, %arg1
|
2104 | 2261 | %shift8 = shufflevector <2 x float> %i12, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
|
@@ -2251,3 +2408,6 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
2251 | 2408 | declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
2252 | 2409 | declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
2253 | 2410 | declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)
|
| 2411 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 2412 | +; GFX90A-SDAG: {{.*}} |
| 2413 | +; GFX942-SDAG: {{.*}} |
0 commit comments