Skip to content

Commit c34819a

Browse files
committed
GlobalISel: Handle G_BUILD_VECTOR in isKnownToBeAPowerOfTwo
1 parent e617cf9 commit c34819a

File tree

2 files changed

+59
-3
lines changed

2 files changed

+59
-3
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,11 +647,32 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
647647

648648
break;
649649
}
650+
case TargetOpcode::G_BUILD_VECTOR: {
651+
// TODO: Probably should have a recursion depth guard since you could have
652+
// bitcasted vector elements.
653+
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
654+
if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB))
655+
return false;
656+
}
657+
658+
return true;
659+
}
660+
case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
661+
// Only handle constants since we would need to know if number of leading
662+
// zeros is greater than the truncation amount.
663+
const unsigned BitWidth = Ty.getScalarSizeInBits();
664+
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
665+
auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI);
666+
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
667+
return false;
668+
}
669+
670+
return true;
671+
}
650672
default:
651673
break;
652674
}
653675

654-
// TODO: Are all operands of a build vector constant powers of two?
655676
if (!KB)
656677
return false;
657678

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,44 @@ body: |
143143
; GCN: %var:_(<2 x s16>) = COPY $vgpr0
144144
; GCN: %four:_(s32) = G_CONSTANT i32 4
145145
; GCN: %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four(s32), %four(s32)
146+
; GCN: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
147+
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
148+
; GCN: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD %four_vec, [[BUILD_VECTOR]]
149+
; GCN: %rem:_(<2 x s16>) = G_AND %var, [[ADD]]
150+
; GCN: $vgpr0 = COPY %rem(<2 x s16>)
151+
%var:_(<2 x s16>) = COPY $vgpr0
152+
%shift_amt:_(s32) = COPY $vgpr1
153+
%four:_(s32) = G_CONSTANT i32 4
154+
%four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four, %four
155+
%rem:_(<2 x s16>) = G_UREM %var, %four_vec
156+
$vgpr0 = COPY %rem
157+
...
158+
159+
# The shl is a known power of two, but we do not know if the final
160+
# value is a power of 2 due to the truncation.
161+
---
162+
name: urem_v2s16_var_nonconst_build_vector_trunc
163+
tracksRegLiveness: true
164+
body: |
165+
bb.0:
166+
liveins: $vgpr0, $vgpr1
167+
168+
; GCN-LABEL: name: urem_v2s16_var_nonconst_build_vector_trunc
169+
; GCN: liveins: $vgpr0, $vgpr1
170+
; GCN: %var:_(<2 x s16>) = COPY $vgpr0
171+
; GCN: %shift_amt:_(<2 x s16>) = COPY $vgpr1
172+
; GCN: %two:_(s32) = G_CONSTANT i32 2
173+
; GCN: %four:_(s32) = G_CONSTANT i32 4
174+
; GCN: %shift:_(s32) = G_SHL %two, %shift_amt(<2 x s16>)
175+
; GCN: %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four(s32), %shift(s32)
146176
; GCN: %rem:_(<2 x s16>) = G_UREM %var, %four_vec
147177
; GCN: $vgpr0 = COPY %rem(<2 x s16>)
148178
%var:_(<2 x s16>) = COPY $vgpr0
149179
%shift_amt:_(<2 x s16>) = COPY $vgpr1
180+
%two:_(s32) = G_CONSTANT i32 2
150181
%four:_(s32) = G_CONSTANT i32 4
151-
%four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four, %four
182+
%shift:_(s32) = G_SHL %two, %shift_amt
183+
%four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four, %shift
152184
%rem:_(<2 x s16>) = G_UREM %var, %four_vec
153185
$vgpr0 = COPY %rem
154186
...
@@ -190,7 +222,10 @@ body: |
190222
; GCN: %pow2_1:_(s32) = G_CONSTANT i32 4096
191223
; GCN: %pow2_2:_(s32) = G_CONSTANT i32 2048
192224
; GCN: %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32)
193-
; GCN: %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec
225+
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
226+
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
227+
; GCN: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD %pow2_vec, [[BUILD_VECTOR]]
228+
; GCN: %rem:_(<2 x s32>) = G_AND %var, [[ADD]]
194229
; GCN: $vgpr0_vgpr1 = COPY %rem(<2 x s32>)
195230
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
196231
%pow2_1:_(s32) = G_CONSTANT i32 4096

0 commit comments

Comments
 (0)