Skip to content

Commit f3645c7

Browse files
committed
[AMDGPU] Use S_BITCMP1_* to replace AND in optimizeCompareInstr
Differential Revision: https://reviews.llvm.org/D109082
1 parent bf77b11 commit f3645c7

File tree

8 files changed

+122
-37
lines changed

8 files changed

+122
-37
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8008,7 +8008,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80088008
return false;
80098009

80108010
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
8011-
this](int64_t ExpectedValue) -> bool {
8011+
this](int64_t ExpectedValue,
8012+
unsigned SrcSize) -> bool {
80128013
// s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
80138014
// s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
80148015
// s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
@@ -8019,10 +8020,9 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80198020
// s_cmp_gt_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
80208021
// s_cmp_gt_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
80218022
// s_cmp_lg_u64 (s_and_b64 $src, 1), 0 => s_and_b64 $src, 1
8022-
8023-
// TODO: Fold this into s_bitcmp* if result of an AND is unused.
8024-
// TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
8025-
// process any power of 2.
8023+
//
8024+
// If result of the AND is unused except in the compare:
8025+
// s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
80268026

80278027
if (CmpValue != ExpectedValue)
80288028
return false;
@@ -8035,8 +8035,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80358035
Def->getOpcode() != AMDGPU::S_AND_B64)
80368036
return false;
80378037

8038-
if ((!Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 1) &&
8039-
(!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1))
8038+
MachineOperand *SrcOp = &Def->getOperand(1);
8039+
if (SrcOp->isImm() && SrcOp->getImm() == 1)
8040+
SrcOp = &Def->getOperand(2);
8041+
else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
80408042
return false;
80418043

80428044
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
@@ -8050,6 +8052,23 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80508052
SccDef->setIsDead(false);
80518053
CmpInstr.eraseFromParent();
80528054

8055+
if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
8056+
return true;
8057+
8058+
// Replace AND with unused result with a S_BITCMP.
8059+
// TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
8060+
// process any power of 2.
8061+
MachineBasicBlock *MBB = Def->getParent();
8062+
8063+
// TODO: Reverse conditions can use S_BITCMP0_*.
8064+
unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
8065+
: AMDGPU::S_BITCMP1_B64;
8066+
8067+
BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
8068+
.add(*SrcOp)
8069+
.addImm(0);
8070+
Def->eraseFromParent();
8071+
80538072
return true;
80548073
};
80558074

@@ -8060,22 +8079,24 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80608079
case AMDGPU::S_CMP_EQ_I32:
80618080
case AMDGPU::S_CMP_GE_U32:
80628081
case AMDGPU::S_CMP_GE_I32:
8063-
case AMDGPU::S_CMP_EQ_U64:
80648082
case AMDGPU::S_CMPK_EQ_U32:
80658083
case AMDGPU::S_CMPK_EQ_I32:
80668084
case AMDGPU::S_CMPK_GE_U32:
80678085
case AMDGPU::S_CMPK_GE_I32:
8068-
return optimizeCmpAnd(1);
8086+
return optimizeCmpAnd(1, 32);
8087+
case AMDGPU::S_CMP_EQ_U64:
8088+
return optimizeCmpAnd(1, 64);
80698089
case AMDGPU::S_CMP_LG_U32:
80708090
case AMDGPU::S_CMP_LG_I32:
80718091
case AMDGPU::S_CMP_GT_U32:
80728092
case AMDGPU::S_CMP_GT_I32:
8073-
case AMDGPU::S_CMP_LG_U64:
80748093
case AMDGPU::S_CMPK_LG_U32:
80758094
case AMDGPU::S_CMPK_LG_I32:
80768095
case AMDGPU::S_CMPK_GT_U32:
80778096
case AMDGPU::S_CMPK_GT_I32:
8078-
return optimizeCmpAnd(0);
8097+
return optimizeCmpAnd(0, 32);
8098+
case AMDGPU::S_CMP_LG_U64:
8099+
return optimizeCmpAnd(0, 64);
80798100
}
80808101

80818102
return false;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
396396
; GCN-NEXT: v_mov_b32_e32 v0, 0x83
397397
; GCN-NEXT: v_mov_b32_e32 v1, 0x80
398398
; GCN-NEXT: s_waitcnt lgkmcnt(0)
399-
; GCN-NEXT: s_and_b32 s0, 1, s0
399+
; GCN-NEXT: s_bitcmp1_b32 s0, 0
400400
; GCN-NEXT: s_cselect_b64 vcc, -1, 0
401401
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
402402
; GCN-NEXT: flat_store_short v[0:1], v0

llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
174174
; GCN-NEXT: s_add_u32 s0, s0, s9
175175
; GCN-NEXT: s_addc_u32 s1, s1, 0
176176
; GCN-NEXT: s_waitcnt lgkmcnt(0)
177-
; GCN-NEXT: s_and_b32 s4, 1, s4
177+
; GCN-NEXT: s_bitcmp1_b32 s4, 0
178178
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
179179
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
180180
; GCN-NEXT: s_mov_b32 s32, 0
@@ -219,7 +219,7 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
219219
; GCN-NEXT: s_add_u32 s0, s0, s9
220220
; GCN-NEXT: s_addc_u32 s1, s1, 0
221221
; GCN-NEXT: s_waitcnt lgkmcnt(0)
222-
; GCN-NEXT: s_and_b32 s4, 1, s4
222+
; GCN-NEXT: s_bitcmp1_b32 s4, 0
223223
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
224224
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
225225
; GCN-NEXT: s_mov_b32 s32, 0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind re
1616
; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x70
1717
; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x94
1818

19-
; GCN-DAG: s_and_b32 [[AND_I1:s[0-9]+]], 1, s{{[0-9]+}}
19+
; GCN-DAG: s_bitcmp1_b32 s{{[0-9]+}}, 0
2020

2121
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
2222
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]

llvm/test/CodeGen/AMDGPU/optimize-compare.mir

Lines changed: 82 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,40 @@ body: |
88
; GCN: bb.0:
99
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
1010
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
11+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
12+
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
13+
; GCN: S_BRANCH %bb.1
14+
; GCN: bb.1:
15+
; GCN: successors: %bb.2(0x80000000)
16+
; GCN: bb.2:
17+
; GCN: S_ENDPGM 0
18+
bb.0:
19+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
20+
liveins: $sgpr0, $vgpr0_vgpr1
21+
22+
%0:sreg_32 = COPY $sgpr0
23+
%1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
24+
S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
25+
S_CBRANCH_SCC0 %bb.2, implicit $scc
26+
S_BRANCH %bb.1
27+
28+
bb.1:
29+
successors: %bb.2(0x80000000)
30+
31+
bb.2:
32+
S_ENDPGM 0
33+
34+
...
35+
36+
---
37+
name: and_1_cmp_eq_u32_1_used_and
38+
body: |
39+
; GCN-LABEL: name: and_1_cmp_eq_u32_1_used_and
40+
; GCN: bb.0:
41+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
42+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
1143
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
44+
; GCN: S_NOP 0, implicit [[S_AND_B32_]]
1245
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
1346
; GCN: S_BRANCH %bb.1
1447
; GCN: bb.1:
@@ -21,6 +54,7 @@ body: |
2154
2255
%0:sreg_32 = COPY $sgpr0
2356
%1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
57+
S_NOP 0, implicit %1
2458
S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
2559
S_CBRANCH_SCC0 %bb.2, implicit $scc
2660
S_BRANCH %bb.1
@@ -40,7 +74,7 @@ body: |
4074
; GCN: bb.0:
4175
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
4276
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
43-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
77+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
4478
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
4579
; GCN: S_BRANCH %bb.1
4680
; GCN: bb.1:
@@ -177,7 +211,7 @@ body: |
177211
; GCN: bb.0:
178212
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
179213
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
180-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[COPY]], 1, implicit-def $scc
214+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
181215
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
182216
; GCN: S_BRANCH %bb.1
183217
; GCN: bb.1:
@@ -446,7 +480,7 @@ body: |
446480
; GCN: bb.0:
447481
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
448482
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
449-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
483+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
450484
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
451485
; GCN: S_BRANCH %bb.1
452486
; GCN: bb.1:
@@ -478,7 +512,7 @@ body: |
478512
; GCN: bb.0:
479513
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
480514
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
481-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
515+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
482516
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
483517
; GCN: S_BRANCH %bb.1
484518
; GCN: bb.1:
@@ -510,7 +544,7 @@ body: |
510544
; GCN: bb.0:
511545
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
512546
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
513-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
547+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
514548
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
515549
; GCN: S_BRANCH %bb.1
516550
; GCN: bb.1:
@@ -542,7 +576,7 @@ body: |
542576
; GCN: bb.0:
543577
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
544578
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
545-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
579+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
546580
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
547581
; GCN: S_BRANCH %bb.1
548582
; GCN: bb.1:
@@ -607,7 +641,7 @@ body: |
607641
; GCN: bb.0:
608642
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
609643
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
610-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
644+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
611645
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
612646
; GCN: S_BRANCH %bb.1
613647
; GCN: bb.1:
@@ -639,7 +673,7 @@ body: |
639673
; GCN: bb.0:
640674
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
641675
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
642-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
676+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
643677
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
644678
; GCN: S_BRANCH %bb.1
645679
; GCN: bb.1:
@@ -671,7 +705,7 @@ body: |
671705
; GCN: bb.0:
672706
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
673707
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
674-
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
708+
; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
675709
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
676710
; GCN: S_BRANCH %bb.1
677711
; GCN: bb.1:
@@ -703,7 +737,7 @@ body: |
703737
; GCN: bb.0:
704738
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
705739
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
706-
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
740+
; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
707741
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
708742
; GCN: S_BRANCH %bb.1
709743
; GCN: bb.1:
@@ -735,7 +769,7 @@ body: |
735769
; GCN: bb.0:
736770
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
737771
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
738-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
772+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
739773
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
740774
; GCN: S_BRANCH %bb.1
741775
; GCN: bb.1:
@@ -767,7 +801,7 @@ body: |
767801
; GCN: bb.0:
768802
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
769803
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
770-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
804+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
771805
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
772806
; GCN: S_BRANCH %bb.1
773807
; GCN: bb.1:
@@ -799,7 +833,7 @@ body: |
799833
; GCN: bb.0:
800834
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
801835
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
802-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
836+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
803837
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
804838
; GCN: S_BRANCH %bb.1
805839
; GCN: bb.1:
@@ -831,7 +865,7 @@ body: |
831865
; GCN: bb.0:
832866
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
833867
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
834-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
868+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
835869
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
836870
; GCN: S_BRANCH %bb.1
837871
; GCN: bb.1:
@@ -863,7 +897,7 @@ body: |
863897
; GCN: bb.0:
864898
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
865899
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
866-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
900+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
867901
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
868902
; GCN: S_BRANCH %bb.1
869903
; GCN: bb.1:
@@ -895,7 +929,7 @@ body: |
895929
; GCN: bb.0:
896930
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
897931
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
898-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
932+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
899933
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
900934
; GCN: S_BRANCH %bb.1
901935
; GCN: bb.1:
@@ -927,7 +961,7 @@ body: |
927961
; GCN: bb.0:
928962
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
929963
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
930-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
964+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
931965
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
932966
; GCN: S_BRANCH %bb.1
933967
; GCN: bb.1:
@@ -959,7 +993,7 @@ body: |
959993
; GCN: bb.0:
960994
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
961995
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
962-
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
996+
; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
963997
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
964998
; GCN: S_BRANCH %bb.1
965999
; GCN: bb.1:
@@ -983,3 +1017,33 @@ body: |
9831017
S_ENDPGM 0
9841018
9851019
...
1020+
1021+
---
1022+
name: and_1_cmp_eq_u32_1_imm_src
1023+
body: |
1024+
; GCN-LABEL: name: and_1_cmp_eq_u32_1_imm_src
1025+
; GCN: bb.0:
1026+
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
1027+
; GCN: S_BITCMP1_B32 11, 0, implicit-def $scc
1028+
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
1029+
; GCN: S_BRANCH %bb.1
1030+
; GCN: bb.1:
1031+
; GCN: successors: %bb.2(0x80000000)
1032+
; GCN: bb.2:
1033+
; GCN: S_ENDPGM 0
1034+
bb.0:
1035+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
1036+
liveins: $sgpr0, $vgpr0_vgpr1
1037+
1038+
%0:sreg_32 = S_AND_B32 1, 11, implicit-def dead $scc
1039+
S_CMP_EQ_U32 killed %0:sreg_32, 1, implicit-def $scc
1040+
S_CBRANCH_SCC0 %bb.2, implicit $scc
1041+
S_BRANCH %bb.1
1042+
1043+
bb.1:
1044+
successors: %bb.2(0x80000000)
1045+
1046+
bb.2:
1047+
S_ENDPGM 0
1048+
1049+
...

llvm/test/CodeGen/AMDGPU/select-i1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define amdgpu_kernel void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1
1515

1616
; GCN-LABEL: {{^}}s_minmax_i1:
1717
; GCN: s_load_dword [[LOAD:s[0-9]+]],
18-
; GCN: s_and_b32 [[COND:s[0-9]+]], 1, [[LOAD]]
18+
; GCN: s_bitcmp1_b32 [[LOAD]], 0
1919
; GCN: s_cselect_b64 vcc, -1, 0
2020
; GCN-DAG: s_lshr_b32 [[A:s[0-9]+]], [[LOAD]], 8
2121
; GCN-DAG: s_lshr_b32 [[B:s[0-9]+]], [[LOAD]], 16

llvm/test/CodeGen/AMDGPU/trunc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ define amdgpu_kernel void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a)
9696
; GCN-LABEL: {{^}}s_trunc_i64_to_i1:
9797
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
9898
; VI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4c
99-
; GCN: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
99+
; GCN: s_bitcmp1_b32 s[[SLO]], 0
100100
; SI: s_cselect_b64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], -1, 0
101101
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
102102
; VI: s_cselect_b32 {{s[0-9]+}}, 63, -12

0 commit comments

Comments
 (0)