Skip to content

Commit 6a1ad76

Browse files
committed
[X86] Don't return true from isTruncateFree for vectors
Also fix some cost tables for vXi1 types to match the costs entries for the types they will be promoted to. Differential Revision: https://reviews.llvm.org/D79045
1 parent ac2635e commit 6a1ad76

File tree

7 files changed

+133
-79
lines changed

7 files changed

+133
-79
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8019,7 +8019,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
80198019
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
80208020
// sra (add (shl X, N1C), AddC), N1C -->
80218021
// sext (add (trunc X to (width - N1C)), AddC')
8022-
if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8022+
if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
80238023
N0.getOperand(0).getOpcode() == ISD::SHL &&
80248024
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
80258025
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
@@ -8036,7 +8036,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
80368036
// implementation and/or target-specific overrides (because
80378037
// non-simple types likely require masking when legalized), but that
80388038
// restriction may conflict with other transforms.
8039-
if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
8039+
if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8040+
TLI.isTruncateFree(VT, TruncVT)) {
80408041
SDLoc DL(N);
80418042
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
80428043
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30597,12 +30597,7 @@ bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
3059730597
}
3059830598

3059930599
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
30600-
if (!VT1.isInteger() || !VT2.isInteger())
30601-
return false;
30602-
// Truncate to mask registers aren't free.
30603-
// TODO: No vector truncates are free.
30604-
if (Subtarget.hasAVX512() && VT2.isVector() &&
30605-
VT2.getVectorElementType() == MVT::i1)
30600+
if (!VT1.isScalarInteger() || !VT2.isScalarInteger())
3060630601
return false;
3060730602
unsigned NumBits1 = VT1.getSizeInBits();
3060830603
unsigned NumBits2 = VT2.getSizeInBits();

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
16951695
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
16961696
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
16971697

1698+
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
1699+
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
1700+
16981701
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
16991702
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
17001703
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
@@ -1726,6 +1729,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
17261729
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
17271730
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
17281731

1732+
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 },
1733+
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 },
1734+
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 },
1735+
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
1736+
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
1737+
17291738
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
17301739
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
17311740
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },

llvm/test/Analysis/CostModel/X86/cast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define i32 @add(i32 %arg) {
2525
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1>
2626
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <8 x i1> undef to <8 x i32>
2727
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = sext <8 x i1> undef to <8 x i32>
28-
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
28+
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
2929
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32
3030
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1
3131
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -36,7 +36,7 @@ define i32 @add(i32 %arg) {
3636
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1>
3737
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <8 x i1> undef to <8 x i32>
3838
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <8 x i1> undef to <8 x i32>
39-
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
39+
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
4040
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32
4141
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1
4242
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef

llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -523,23 +523,23 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" {
523523
define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
524524
; AVX-LABEL: 'trunc_vXi1'
525525
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
526-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
527-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
528-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
529-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
530-
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
526+
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
527+
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
528+
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
529+
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
530+
; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
531531
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
532532
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
533-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
534-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
535-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
536-
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
533+
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
534+
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
535+
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
536+
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
537537
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
538538
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
539539
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
540-
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
541-
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
542-
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
540+
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
541+
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
542+
; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
543543
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
544544
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
545545
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
@@ -551,7 +551,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
551551
; AVX512VL256-LABEL: 'trunc_vXi1'
552552
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
553553
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
554-
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
554+
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
555555
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
556556
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
557557
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
@@ -579,9 +579,9 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
579579
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
580580
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
581581
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
582-
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
583-
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
584-
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
582+
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
583+
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
584+
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
585585
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
586586
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
587587
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
@@ -605,7 +605,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
605605
; SKX256-LABEL: 'trunc_vXi1'
606606
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
607607
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
608-
; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
608+
; SKX256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
609609
; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
610610
; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
611611
; SKX256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
@@ -633,9 +633,9 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
633633
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
634634
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
635635
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
636-
; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
637-
; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
638-
; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
636+
; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
637+
; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
638+
; SKX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
639639
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
640640
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
641641
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>

0 commit comments

Comments
 (0)