Skip to content

Commit bb5f53c

Browse files
authored
[DAG] isSplatValue - only treat binop splats with repeated undef elements as undef (llvm#135945)
llvm#135597 didn't correctly fix the issue of binops with an undef element from only one operand - only reporting the common undef elements could incorrectly recognise splats where the (binop X, undef) fold might actually be different - we need to ensure both operands have the same demanded undefs for certainty. Fixes llvm#135917
1 parent bc03d6c commit bb5f53c

File tree

8 files changed

+568
-197
lines changed

8 files changed

+568
-197
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -3002,12 +3002,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
30023002
APInt UndefLHS, UndefRHS;
30033003
SDValue LHS = V.getOperand(0);
30043004
SDValue RHS = V.getOperand(1);
3005-
// Only propagate common undef elts for both operands, otherwise we might
3006-
// fail to handle binop-specific undef handling.
3005+
// Only recognize splats with the same demanded undef elements for both
3006+
// operands, otherwise we might fail to handle binop-specific undef
3007+
// handling.
30073008
// e.g. (and undef, 0) -> 0 etc.
30083009
if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) &&
3009-
isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) {
3010-
UndefElts = UndefLHS & UndefRHS;
3010+
isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1) &&
3011+
(DemandedElts & UndefLHS) == (DemandedElts & UndefRHS)) {
3012+
UndefElts = UndefLHS | UndefRHS;
30113013
return true;
30123014
}
30133015
return false;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll

+2-1
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,8 @@ define void @buggy(i32 %0) #0 {
452452
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
453453
; RV64-NEXT: vmv.v.x v8, a0
454454
; RV64-NEXT: vor.vi v8, v8, 1
455-
; RV64-NEXT: vse32.v v8, (zero)
455+
; RV64-NEXT: vrgather.vi v9, v8, 0
456+
; RV64-NEXT: vse32.v v9, (zero)
456457
; RV64-NEXT: ret
457458
entry:
458459
%mul.us.us.i.3 = shl i32 %0, 1

llvm/test/CodeGen/X86/pr134602.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define i32 @PR134602(i16 %a0) {
1717
; X64-NEXT: movzwl %di, %eax
1818
; X64-NEXT: movd %eax, %xmm0
1919
; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20-
; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,2,2,2,4,5,6,7]
20+
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2121
; X64-NEXT: paddw %xmm0, %xmm1
2222
; X64-NEXT: movdqa %xmm1, %xmm0
2323
; X64-NEXT: psrld $16, %xmm0

llvm/test/CodeGen/X86/pr135917.ll

+13-33
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,26 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefix=SSE4
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefix=AVX2
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s
55
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefix=AVX512
66

77
define i32 @PR135917(i1 %a0) {
8-
; SSE2-LABEL: PR135917:
9-
; SSE2: # %bb.0:
10-
; SSE2-NEXT: movd %edi, %xmm0
11-
; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12-
; SSE2-NEXT: movd %xmm0, %ecx
13-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
14-
; SSE2-NEXT: movd %xmm0, %eax
15-
; SSE2-NEXT: addl %ecx, %eax
16-
; SSE2-NEXT: retq
17-
;
18-
; SSE4-LABEL: PR135917:
19-
; SSE4: # %bb.0:
20-
; SSE4-NEXT: movd %edi, %xmm0
21-
; SSE4-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22-
; SSE4-NEXT: movd %xmm0, %ecx
23-
; SSE4-NEXT: pextrd $1, %xmm0, %eax
24-
; SSE4-NEXT: addl %ecx, %eax
25-
; SSE4-NEXT: retq
26-
;
27-
; AVX2-LABEL: PR135917:
28-
; AVX2: # %bb.0:
29-
; AVX2-NEXT: vmovd %edi, %xmm0
30-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
31-
; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
32-
; AVX2-NEXT: vmovd %xmm0, %ecx
33-
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
34-
; AVX2-NEXT: addl %ecx, %eax
35-
; AVX2-NEXT: retq
8+
; CHECK-LABEL: PR135917:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
11+
; CHECK-NEXT: notl %edi
12+
; CHECK-NEXT: andl $1, %edi
13+
; CHECK-NEXT: leal (%rdi,%rdi), %eax
14+
; CHECK-NEXT: retq
3615
;
3716
; AVX512-LABEL: PR135917:
3817
; AVX512: # %bb.0:
39-
; AVX512-NEXT: andb $1, %dil
40-
; AVX512-NEXT: negb %dil
4118
; AVX512-NEXT: kmovd %edi, %k0
4219
; AVX512-NEXT: knotw %k0, %k0
4320
; AVX512-NEXT: vpmovm2d %k0, %xmm0
21+
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
22+
; AVX512-NEXT: vpmovd2m %xmm0, %k0
23+
; AVX512-NEXT: vpmovm2d %k0, %xmm0
4424
; AVX512-NEXT: vpsrld $31, %xmm0, %xmm0
4525
; AVX512-NEXT: vmovd %xmm0, %ecx
4626
; AVX512-NEXT: vpextrd $1, %xmm0, %eax

llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll

+66-26
Original file line numberDiff line numberDiff line change
@@ -162,42 +162,72 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
162162
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
163163
; SSE2-LABEL: splatvar_funnnel_v2i32:
164164
; SSE2: # %bb.0:
165+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
166+
; SSE2-NEXT: pslld $23, %xmm1
165167
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
166-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
167-
; SSE2-NEXT: psllq %xmm1, %xmm2
168-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
169-
; SSE2-NEXT: psllq %xmm1, %xmm0
170-
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
168+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
169+
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
170+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
171+
; SSE2-NEXT: pmuludq %xmm1, %xmm0
172+
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
173+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
174+
; SSE2-NEXT: pmuludq %xmm2, %xmm1
175+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
176+
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
177+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
178+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
179+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
180+
; SSE2-NEXT: por %xmm3, %xmm0
171181
; SSE2-NEXT: retq
172182
;
173183
; SSE41-LABEL: splatvar_funnnel_v2i32:
174184
; SSE41: # %bb.0:
185+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
186+
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
187+
; SSE41-NEXT: pslld $23, %xmm1
175188
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
176-
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
177-
; SSE41-NEXT: psllq %xmm1, %xmm2
178-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
179-
; SSE41-NEXT: psllq %xmm1, %xmm0
180-
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
189+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
190+
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
191+
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
192+
; SSE41-NEXT: pmuludq %xmm2, %xmm3
193+
; SSE41-NEXT: pmuludq %xmm1, %xmm0
194+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
195+
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
196+
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
197+
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
198+
; SSE41-NEXT: por %xmm1, %xmm0
181199
; SSE41-NEXT: retq
182200
;
183201
; AVX1-LABEL: splatvar_funnnel_v2i32:
184202
; AVX1: # %bb.0:
203+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
204+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
205+
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
185206
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
186-
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
187-
; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
188-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
189-
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
190-
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
207+
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
208+
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
209+
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
210+
; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
211+
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
212+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
213+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
214+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
215+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
216+
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
191217
; AVX1-NEXT: retq
192218
;
193219
; AVX2-LABEL: splatvar_funnnel_v2i32:
194220
; AVX2: # %bb.0:
195-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
196-
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
197-
; AVX2-NEXT: vpsllq %xmm1, %xmm2, %xmm2
198-
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
199-
; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0
200-
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
221+
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
222+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
223+
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
224+
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
225+
; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2
226+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
227+
; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
228+
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
229+
; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
230+
; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
201231
; AVX2-NEXT: retq
202232
;
203233
; AVX512F-LABEL: splatvar_funnnel_v2i32:
@@ -259,12 +289,22 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
259289
;
260290
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
261291
; X86-SSE2: # %bb.0:
292+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
293+
; X86-SSE2-NEXT: pslld $23, %xmm1
262294
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
263-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
264-
; X86-SSE2-NEXT: psllq %xmm1, %xmm2
265-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
266-
; X86-SSE2-NEXT: psllq %xmm1, %xmm0
267-
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
295+
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
296+
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
297+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
298+
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
299+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
300+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
301+
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
302+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
303+
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
304+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
305+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
306+
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
307+
; X86-SSE2-NEXT: por %xmm3, %xmm0
268308
; X86-SSE2-NEXT: retl
269309
%splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
270310
%res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)

0 commit comments

Comments
 (0)