Skip to content

Commit 64399da

Browse files
committed
[AMDGPU] gfx1010 lost VOP2 forms of some add/sub
Add legalization of V_ADD_I32, V_SUB_I32, V_SUBREV_I32. Differential Revision: llvm-svn: 359757
1 parent 5cf8167 commit 64399da

File tree

3 files changed

+88
-19
lines changed

3 files changed

+88
-19
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,6 +3540,33 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
35403540
MI.eraseFromParent();
35413541
return BB;
35423542
}
3543+
case AMDGPU::V_ADD_I32_e32:
3544+
case AMDGPU::V_SUB_I32_e32:
3545+
case AMDGPU::V_SUBREV_I32_e32: {
3546+
// TODO: Define distinct V_*_I32_Pseudo instructions instead.
3547+
const DebugLoc &DL = MI.getDebugLoc();
3548+
unsigned Opc = MI.getOpcode();
3549+
3550+
bool NeedClampOperand = false;
3551+
if (TII->pseudoToMCOpcode(Opc) == -1) {
3552+
Opc = AMDGPU::getVOPe64(Opc);
3553+
NeedClampOperand = true;
3554+
}
3555+
3556+
auto I = BuildMI(*BB, MI, DL, TII->get(Opc), MI.getOperand(0).getReg());
3557+
if (TII->isVOP3(*I)) {
3558+
I.addReg(AMDGPU::VCC, RegState::Define);
3559+
}
3560+
I.add(MI.getOperand(1))
3561+
.add(MI.getOperand(2));
3562+
if (NeedClampOperand)
3563+
I.addImm(0); // clamp bit for e64 encoding
3564+
3565+
TII->legalizeOperands(*I);
3566+
3567+
MI.eraseFromParent();
3568+
return BB;
3569+
}
35433570
default:
35443571
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
35453572
}

llvm/test/CodeGen/AMDGPU/mad.u16.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
4+
5+
; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
6+
7+
; GCN-LABEL: {{^}}mad_u16
8+
; GCN: {{flat|global}}_load_ushort v[[A:[0-9]+]]
9+
; GCN: {{flat|global}}_load_ushort v[[B:[0-9]+]]
10+
; GCN: {{flat|global}}_load_ushort v[[C:[0-9]+]]
11+
; GFX8: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
12+
; GFX9: v_mad_legacy_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
13+
; GFX10: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
14+
; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[R]]
15+
; GCN: s_endpgm
16+
define amdgpu_kernel void @mad_u16(
17+
i16 addrspace(1)* %r,
18+
i16 addrspace(1)* %a,
19+
i16 addrspace(1)* %b,
20+
i16 addrspace(1)* %c) {
21+
entry:
22+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
23+
%a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a, i32 %tid
24+
%b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b, i32 %tid
25+
%c.gep = getelementptr inbounds i16, i16 addrspace(1)* %c, i32 %tid
26+
27+
%a.val = load volatile i16, i16 addrspace(1)* %a.gep
28+
%b.val = load volatile i16, i16 addrspace(1)* %b.gep
29+
%c.val = load volatile i16, i16 addrspace(1)* %c.gep
30+
31+
%m.val = mul i16 %a.val, %b.val
32+
%r.val = add i16 %m.val, %c.val
33+
34+
store i16 %r.val, i16 addrspace(1)* %r
35+
ret void
36+
}
37+
38+
declare i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/min.ll

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2-
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s
3-
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s
2+
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
3+
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9_10 -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
4+
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX9_10 -check-prefix=GFX8_9_10 -check-prefix=FUNC %s
45
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
56

67
; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
@@ -74,8 +75,9 @@ define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32],
7475

7576
; FIXME: Why vector and sdwa for last element?
7677
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
77-
; GCN: s_load_dword s
78-
; GCN: s_load_dword s
78+
; GCN-DAG: s_load_dwordx2
79+
; GCN-DAG: s_load_dword s
80+
; GCN-DAG: s_load_dword s
7981
; GCN-NOT: _load_
8082

8183
; SI: s_min_i32
@@ -88,10 +90,10 @@ define amdgpu_kernel void @s_test_imin_sle_i8(i8 addrspace(1)* %out, [8 x i32],
8890
; VI: s_min_i32
8991
; VI: v_min_i32_sdwa
9092

91-
; GFX9: v_min_i16
92-
; GFX9: v_min_i16
93-
; GFX9: v_min_i16
94-
; GFX9: v_min_i16
93+
; GFX9_10: v_min_i16
94+
; GFX9_10: v_min_i16
95+
; GFX9_10: v_min_i16
96+
; GFX9_10: v_min_i16
9597

9698
; EG: MIN_INT
9799
; EG: MIN_INT
@@ -120,7 +122,7 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8
120122
; VI: s_min_i32
121123
; VI: s_min_i32
122124

123-
; GFX9: v_pk_min_i16
125+
; GFX9_10: v_pk_min_i16
124126

125127
; EG: MIN_INT
126128
; EG: MIN_INT
@@ -143,8 +145,8 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <
143145
; VI: s_min_i32
144146
; VI: s_min_i32
145147

146-
; GFX9: v_pk_min_i16
147-
; GFX9: v_pk_min_i16
148+
; GFX9_10: v_pk_min_i16
149+
; GFX9_10: v_pk_min_i16
148150

149151
; EG: MIN_INT
150152
; EG: MIN_INT
@@ -177,7 +179,8 @@ define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrs
177179
; FUNC-LABEL: @v_test_imin_slt_i16
178180
; SI: v_min_i32_e32
179181

180-
; GFX89: v_min_i16_e32
182+
; GFX8_9: v_min_i16_e32
183+
; GFX10: v_min_i16_e64
181184

182185
; EG: MIN_INT
183186
define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
@@ -293,8 +296,8 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <
293296
; VI: v_min_u16_e32
294297
; VI-NOT: v_min_u16
295298

296-
; GFX9: v_pk_min_u16
297-
; GFX9: v_pk_min_u16
299+
; GFX9_10: v_pk_min_u16
300+
; GFX9_10: v_pk_min_u16
298301

299302
; GCN: s_endpgm
300303

@@ -348,9 +351,10 @@ define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrs
348351
; SI: {{buffer|flat|global}}_load_ubyte
349352
; SI: v_min_u32_e32
350353

351-
; GFX89: {{flat|global}}_load_ubyte
352-
; GFX89: {{flat|global}}_load_ubyte
353-
; GFX89: v_min_u16_e32
354+
; GFX8_9_10: {{flat|global}}_load_ubyte
355+
; GFX8_9_10: {{flat|global}}_load_ubyte
356+
; GFX8_9: v_min_u16_e32
357+
; GFX10: v_min_u16_e64
354358

355359
; EG: MIN_UINT
356360
define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
@@ -597,7 +601,7 @@ define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64
597601
; VI: v_min_i16
598602
; VI: v_min_i16
599603

600-
; GFX9: v_pk_min_i16
604+
; GFX9_10: v_pk_min_i16
601605

602606
; EG: MIN_INT
603607
; EG: MIN_INT
@@ -622,7 +626,7 @@ define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <
622626
; VI: v_min_u16
623627
; VI: v_min_u16
624628

625-
; GFX9: v_pk_min_u16
629+
; GFX9_10: v_pk_min_u16
626630

627631
; EG: MIN_UINT
628632
; EG: MIN_UINT

0 commit comments

Comments
 (0)