Skip to content

Commit 5b89aaa

Browse files
authored
[X86] Fold concat(PCMP*(),PCMP*()) -> CMPPS(concat,concat) on AVX1 targets (llvm#95915)
This is a more restricted solution to llvm#82242 (vs the more general llvm#82290 + llvm#84360) whereby if we're concat'ing PCMPEQ/GT nodes to 256-bits on a AVX1 target then determine if the integer values are in bounds to allow them to be converted to FP for a (legal) float comparison. By performing this inside combineConcatVectorOps and working on PCMPEQ/GT nodes and not ICMP, we delay the fold until after more lowering has occurred, which avoids many of the issues where we were getting 'stuck' with CMPPS or unnecessary 256-bit nodes, and can more easily determine if either of the new concats() will be free. Additionally this patch requires BOTH comparison operands to be in range, while technically not required this does help avoid the remaining regressions. It doesn't require that one of the operands is constant as it didn't seem necessary to include that constraint. I've reused some of the code from llvm#82290, and we may be able to add additional functionality (more CondCode patterns, v4i64/v4f64 handling, 'bitcastable' integers etc.) in future patches. Fixes llvm#82242
1 parent 66f55a7 commit 5b89aaa

File tree

7 files changed

+1641
-1634
lines changed

7 files changed

+1641
-1634
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55734,6 +55734,28 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
5573455734
return SDValue();
5573555735
}
5573655736

55737+
// Helper to determine if we can convert an integer comparison to a float
55738+
// comparison byt casting the operands.
55739+
static std::optional<unsigned>
55740+
CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,
55741+
unsigned NumSignificantBitsRHS) {
55742+
MVT SVT = VT.getScalarType();
55743+
assert(SVT == MVT::f32 && "Only tested for float so far");
55744+
const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(SVT);
55745+
assert((CC == ISD::SETEQ || CC == ISD::SETGT) &&
55746+
"Only PCMPEQ/PCMPGT currently supported");
55747+
55748+
// TODO: Handle bitcastable integers.
55749+
55750+
// For cvt + signed compare we need lhs and rhs to be exactly representable as
55751+
// a fp value.
55752+
unsigned FPPrec = APFloat::semanticsPrecision(Sem);
55753+
if (FPPrec >= NumSignificantBitsLHS && FPPrec >= NumSignificantBitsRHS)
55754+
return ISD::SINT_TO_FP;
55755+
55756+
return std::nullopt;
55757+
}
55758+
5573755759
/// Helper that combines an array of subvector ops as if they were the operands
5573855760
/// of a ISD::CONCAT_VECTORS node, but may have come from another source (e.g.
5573955761
/// ISD::INSERT_SUBVECTOR). The ops are assumed to be of the same type.
@@ -56126,11 +56148,50 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5612656148
break;
5612756149
case X86ISD::PCMPEQ:
5612856150
case X86ISD::PCMPGT:
56129-
if (!IsSplat && VT.is256BitVector() && Subtarget.hasInt256() &&
56151+
if (!IsSplat && VT.is256BitVector() &&
56152+
(Subtarget.hasInt256() || VT == MVT::v8i32) &&
5613056153
(IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
56131-
return DAG.getNode(Op0.getOpcode(), DL, VT,
56132-
ConcatSubOperand(VT, Ops, 0),
56133-
ConcatSubOperand(VT, Ops, 1));
56154+
if (Subtarget.hasInt256())
56155+
return DAG.getNode(Op0.getOpcode(), DL, VT,
56156+
ConcatSubOperand(VT, Ops, 0),
56157+
ConcatSubOperand(VT, Ops, 1));
56158+
56159+
// Without AVX2, see if we can cast the values to v8f32 and use fcmp.
56160+
// TODO: Handle v4f64 as well?
56161+
unsigned MaxSigBitsLHS = 0, MaxSigBitsRHS = 0;
56162+
for (unsigned I = 0; I != NumOps; ++I) {
56163+
MaxSigBitsLHS =
56164+
std::max(MaxSigBitsLHS,
56165+
DAG.ComputeMaxSignificantBits(Ops[I].getOperand(0)));
56166+
MaxSigBitsRHS =
56167+
std::max(MaxSigBitsRHS,
56168+
DAG.ComputeMaxSignificantBits(Ops[I].getOperand(1)));
56169+
if (MaxSigBitsLHS == EltSizeInBits && MaxSigBitsRHS == EltSizeInBits)
56170+
break;
56171+
}
56172+
56173+
ISD::CondCode ICC =
56174+
Op0.getOpcode() == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
56175+
ISD::CondCode FCC =
56176+
Op0.getOpcode() == X86ISD::PCMPEQ ? ISD::SETOEQ : ISD::SETOGT;
56177+
56178+
MVT FpSVT = MVT::getFloatingPointVT(EltSizeInBits);
56179+
MVT FpVT = VT.changeVectorElementType(FpSVT);
56180+
56181+
if (std::optional<unsigned> CastOpc =
56182+
CastIntSETCCtoFP(FpVT, ICC, MaxSigBitsLHS, MaxSigBitsRHS)) {
56183+
SDValue LHS = ConcatSubOperand(VT, Ops, 0);
56184+
SDValue RHS = ConcatSubOperand(VT, Ops, 1);
56185+
LHS = DAG.getNode(*CastOpc, DL, FpVT, LHS);
56186+
RHS = DAG.getNode(*CastOpc, DL, FpVT, RHS);
56187+
56188+
bool IsAlwaysSignaling;
56189+
unsigned FSETCC =
56190+
translateX86FSETCC(FCC, LHS, RHS, IsAlwaysSignaling);
56191+
return DAG.getBitcast(
56192+
VT, DAG.getNode(X86ISD::CMPP, DL, FpVT, LHS, RHS,
56193+
DAG.getTargetConstant(FSETCC, DL, MVT::i8)));
56194+
}
5613456195
}
5613556196
break;
5613656197
case ISD::CTPOP:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -256,12 +256,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
256256
; AVX1-NEXT: vmovd %edi, %xmm0
257257
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
258258
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
260-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
261-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
262-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
263-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
264-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
259+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
260+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
261+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
265262
; AVX1-NEXT: retq
266263
;
267264
; AVX2-LABEL: ext_i8_8i32:
@@ -487,18 +484,12 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
487484
; AVX1-NEXT: vmovd %edi, %xmm0
488485
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
489486
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
490-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
491-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
492-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
493-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
494-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
495-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
496-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
497-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
498-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
499-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
500-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
501-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
487+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
488+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
489+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
490+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
491+
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
492+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
502493
; AVX1-NEXT: retq
503494
;
504495
; AVX2-LABEL: ext_i16_16i32:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
320320
; AVX1-NEXT: vmovd %edi, %xmm0
321321
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
322322
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
323-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
324-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
325-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
326-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
327-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
328-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
323+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
324+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
325+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
329326
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
330327
; AVX1-NEXT: retq
331328
;
@@ -613,20 +610,14 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
613610
; AVX1-NEXT: vmovd %edi, %xmm0
614611
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
615612
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
616-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
617-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
618-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
619-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
620-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
621-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
613+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
614+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
615+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
622616
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
623617
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
624-
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [256,512,1024,2048,4096,8192,16384,32768]
625-
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
626-
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
627-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
628-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
629-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
618+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
619+
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
620+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
630621
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
631622
; AVX1-NEXT: retq
632623
;

llvm/test/CodeGen/X86/cmpf-avx.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,20 @@
22
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
44

5+
; PR82242
56
define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
67
; X86-LABEL: cmp_eq_bitcast:
78
; X86: # %bb.0:
89
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
9-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
10-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
11-
; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
12-
; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
13-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
10+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
11+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1412
; X86-NEXT: retl
1513
;
1614
; X64-LABEL: cmp_eq_bitcast:
1715
; X64: # %bb.0:
1816
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
19-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
20-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
21-
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
22-
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
23-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
17+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
18+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2419
; X64-NEXT: retq
2520
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2621
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>

0 commit comments

Comments
 (0)