@@ -5802,6 +5802,71 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5802
5802
return false;
5803
5803
}
5804
5804
5805
+ static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
5806
+ const SDLoc &dl) {
5807
+ MVT VT = Op.getSimpleValueType();
5808
+ unsigned NumElems = VT.getVectorNumElements();
5809
+ unsigned SizeInBits = VT.getSizeInBits();
5810
+
5811
+ SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
5812
+ SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
5813
+
5814
+ return std::make_pair(Lo, Hi);
5815
+ }
5816
+
5817
+ // Split an unary integer op into 2 half sized ops.
5818
+ static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
5819
+ EVT VT = Op.getValueType();
5820
+
5821
+ // Make sure we only try to split 256/512-bit types to avoid creating
5822
+ // narrow vectors.
5823
+ assert((Op.getOperand(0).getValueType().is256BitVector() ||
5824
+ Op.getOperand(0).getValueType().is512BitVector()) &&
5825
+ (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
5826
+ assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
5827
+ VT.getVectorNumElements() &&
5828
+ "Unexpected VTs!");
5829
+
5830
+ SDLoc dl(Op);
5831
+
5832
+ // Extract the Lo/Hi vectors
5833
+ SDValue Lo, Hi;
5834
+ std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl);
5835
+
5836
+ EVT LoVT, HiVT;
5837
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5838
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
5839
+ DAG.getNode(Op.getOpcode(), dl, LoVT, Lo),
5840
+ DAG.getNode(Op.getOpcode(), dl, HiVT, Hi));
5841
+ }
5842
+
5843
+ /// Break a binary integer operation into 2 half sized ops and then
5844
+ /// concatenate the result back.
5845
+ static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
5846
+ EVT VT = Op.getValueType();
5847
+
5848
+ // Sanity check that all the types match.
5849
+ assert(Op.getOperand(0).getValueType() == VT &&
5850
+ Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
5851
+ assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
5852
+
5853
+ SDLoc dl(Op);
5854
+
5855
+ // Extract the LHS Lo/Hi vectors
5856
+ SDValue LHS1, LHS2;
5857
+ std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
5858
+
5859
+ // Extract the RHS Lo/Hi vectors
5860
+ SDValue RHS1, RHS2;
5861
+ std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
5862
+
5863
+ EVT LoVT, HiVT;
5864
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5865
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
5866
+ DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1),
5867
+ DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2));
5868
+ }
5869
+
5805
5870
// Helper for splitting operands of an operation to legal target size and
5806
5871
// apply a function on each part.
5807
5872
// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
@@ -21820,32 +21885,30 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
21820
21885
21821
21886
/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
21822
21887
/// concatenate the result back.
21823
- static SDValue Lower256IntVSETCC (SDValue Op, SelectionDAG &DAG) {
21824
- MVT VT = Op.getSimpleValueType ();
21888
+ static SDValue splitIntVSETCC (SDValue Op, SelectionDAG &DAG) {
21889
+ EVT VT = Op.getValueType ();
21825
21890
21826
- assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
21827
- "Unsupported value type for operation");
21891
+ assert(Op.getOpcode() == ISD::SETCC && "Unsupported operation");
21892
+ assert(Op.getOperand(0).getValueType().isInteger() &&
21893
+ VT == Op.getOperand(0).getValueType() && "Unsupported VTs!");
21828
21894
21829
- unsigned NumElems = VT.getVectorNumElements();
21830
21895
SDLoc dl(Op);
21831
21896
SDValue CC = Op.getOperand(2);
21832
21897
21833
- // Extract the LHS vectors
21834
- SDValue LHS = Op.getOperand(0);
21835
- SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
21836
- SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);
21898
+ // Extract the LHS Lo/Hi vectors
21899
+ SDValue LHS1, LHS2;
21900
+ std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
21837
21901
21838
- // Extract the RHS vectors
21839
- SDValue RHS = Op.getOperand(1);
21840
- SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
21841
- SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
21902
+ // Extract the RHS Lo/Hi vectors
21903
+ SDValue RHS1, RHS2;
21904
+ std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
21842
21905
21843
21906
// Issue the operation on the smaller types and concatenate the result back
21844
- MVT EltVT = VT.getVectorElementType() ;
21845
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2 );
21907
+ EVT LoVT, HiVT ;
21908
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT );
21846
21909
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
21847
- DAG.getNode(Op.getOpcode() , dl, NewVT , LHS1, RHS1, CC),
21848
- DAG.getNode(Op.getOpcode() , dl, NewVT , LHS2, RHS2, CC));
21910
+ DAG.getNode(ISD::SETCC , dl, LoVT , LHS1, RHS1, CC),
21911
+ DAG.getNode(ISD::SETCC , dl, HiVT , LHS2, RHS2, CC));
21849
21912
}
21850
21913
21851
21914
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
@@ -22187,7 +22250,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
22187
22250
22188
22251
// Break 256-bit integer vector compare into smaller ones.
22189
22252
if (VT.is256BitVector() && !Subtarget.hasInt256())
22190
- return Lower256IntVSETCC (Op, DAG);
22253
+ return splitIntVSETCC (Op, DAG);
22191
22254
22192
22255
// If this is a SETNE against the signed minimum value, change it to SETGT.
22193
22256
// If this is a SETNE against the signed maximum value, change it to SETLT.
@@ -25922,43 +25985,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
25922
25985
return DAG.getMergeValues({RetVal, Chain}, DL);
25923
25986
}
25924
25987
25925
- // Split an unary integer op into 2 half sized ops.
25926
- static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
25927
- MVT VT = Op.getSimpleValueType();
25928
- unsigned NumElems = VT.getVectorNumElements();
25929
- unsigned SizeInBits = VT.getSizeInBits();
25930
- MVT EltVT = VT.getVectorElementType();
25931
- SDValue Src = Op.getOperand(0);
25932
- assert(EltVT == Src.getSimpleValueType().getVectorElementType() &&
25933
- "Src and Op should have the same element type!");
25934
-
25935
- // Extract the Lo/Hi vectors
25936
- SDLoc dl(Op);
25937
- SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
25938
- SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
25939
-
25940
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
25941
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
25942
- DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
25943
- DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
25944
- }
25945
-
25946
- // Decompose 256-bit ops into smaller 128-bit ops.
25947
- static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) {
25948
- assert(Op.getSimpleValueType().is256BitVector() &&
25949
- Op.getSimpleValueType().isInteger() &&
25950
- "Only handle AVX 256-bit vector integer operation");
25951
- return LowerVectorIntUnary(Op, DAG);
25952
- }
25953
-
25954
- // Decompose 512-bit ops into smaller 256-bit ops.
25955
- static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
25956
- assert(Op.getSimpleValueType().is512BitVector() &&
25957
- Op.getSimpleValueType().isInteger() &&
25958
- "Only handle AVX 512-bit vector integer operation");
25959
- return LowerVectorIntUnary(Op, DAG);
25960
- }
25961
-
25962
25988
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
25963
25989
//
25964
25990
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -25979,7 +26005,7 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
25979
26005
// Split vector, it's Lo and Hi parts will be handled in next iteration.
25980
26006
if (NumElems > 16 ||
25981
26007
(NumElems == 16 && !Subtarget.canExtendTo512DQ()))
25982
- return LowerVectorIntUnary (Op, DAG);
26008
+ return splitVectorIntUnary (Op, DAG);
25983
26009
25984
26010
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
25985
26011
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
@@ -26089,11 +26115,11 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
26089
26115
26090
26116
// Decompose 256-bit ops into smaller 128-bit ops.
26091
26117
if (VT.is256BitVector() && !Subtarget.hasInt256())
26092
- return Lower256IntUnary (Op, DAG);
26118
+ return splitVectorIntUnary (Op, DAG);
26093
26119
26094
26120
// Decompose 512-bit ops into smaller 256-bit ops.
26095
26121
if (VT.is512BitVector() && !Subtarget.hasBWI())
26096
- return Lower512IntUnary (Op, DAG);
26122
+ return splitVectorIntUnary (Op, DAG);
26097
26123
26098
26124
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
26099
26125
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
@@ -26159,48 +26185,6 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
26159
26185
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
26160
26186
}
26161
26187
26162
- /// Break a binary integer operation into 2 half sized ops and then
26163
- /// concatenate the result back.
26164
- static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
26165
- MVT VT = Op.getSimpleValueType();
26166
- unsigned NumElems = VT.getVectorNumElements();
26167
- unsigned SizeInBits = VT.getSizeInBits();
26168
- SDLoc dl(Op);
26169
-
26170
- // Extract the LHS Lo/Hi vectors
26171
- SDValue LHS = Op.getOperand(0);
26172
- SDValue LHS1 = extractSubVector(LHS, 0, DAG, dl, SizeInBits / 2);
26173
- SDValue LHS2 = extractSubVector(LHS, NumElems / 2, DAG, dl, SizeInBits / 2);
26174
-
26175
- // Extract the RHS Lo/Hi vectors
26176
- SDValue RHS = Op.getOperand(1);
26177
- SDValue RHS1 = extractSubVector(RHS, 0, DAG, dl, SizeInBits / 2);
26178
- SDValue RHS2 = extractSubVector(RHS, NumElems / 2, DAG, dl, SizeInBits / 2);
26179
-
26180
- MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems / 2);
26181
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
26182
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
26183
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
26184
- }
26185
-
26186
- /// Break a 256-bit integer operation into two new 128-bit ones and then
26187
- /// concatenate the result back.
26188
- static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) {
26189
- assert(Op.getSimpleValueType().is256BitVector() &&
26190
- Op.getSimpleValueType().isInteger() &&
26191
- "Unsupported value type for operation");
26192
- return splitVectorIntBinary(Op, DAG);
26193
- }
26194
-
26195
- /// Break a 512-bit integer operation into two new 256-bit ones and then
26196
- /// concatenate the result back.
26197
- static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
26198
- assert(Op.getSimpleValueType().is512BitVector() &&
26199
- Op.getSimpleValueType().isInteger() &&
26200
- "Unsupported value type for operation");
26201
- return splitVectorIntBinary(Op, DAG);
26202
- }
26203
-
26204
26188
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
26205
26189
const X86Subtarget &Subtarget) {
26206
26190
MVT VT = Op.getSimpleValueType();
@@ -26214,7 +26198,7 @@ static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
26214
26198
assert(Op.getSimpleValueType().is256BitVector() &&
26215
26199
Op.getSimpleValueType().isInteger() &&
26216
26200
"Only handle AVX 256-bit vector integer operation");
26217
- return split256IntArith (Op, DAG);
26201
+ return splitVectorIntBinary (Op, DAG);
26218
26202
}
26219
26203
26220
26204
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
@@ -26262,7 +26246,7 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
26262
26246
assert(Op.getSimpleValueType().is256BitVector() &&
26263
26247
Op.getSimpleValueType().isInteger() &&
26264
26248
"Only handle AVX 256-bit vector integer operation");
26265
- return split256IntArith (Op, DAG);
26249
+ return splitVectorIntBinary (Op, DAG);
26266
26250
}
26267
26251
26268
26252
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
@@ -26292,7 +26276,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
26292
26276
if (VT.is256BitVector() && !Subtarget.hasInt256()) {
26293
26277
assert(VT.isInteger() &&
26294
26278
"Only handle AVX 256-bit vector integer operation");
26295
- return Lower256IntUnary (Op, DAG);
26279
+ return splitVectorIntUnary (Op, DAG);
26296
26280
}
26297
26281
26298
26282
// Default to expand.
@@ -26304,7 +26288,7 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
26304
26288
26305
26289
// For AVX1 cases, split to use legal ops (everything but v4i64).
26306
26290
if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
26307
- return split256IntArith (Op, DAG);
26291
+ return splitVectorIntBinary (Op, DAG);
26308
26292
26309
26293
SDLoc DL(Op);
26310
26294
unsigned Opcode = Op.getOpcode();
@@ -26348,7 +26332,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
26348
26332
26349
26333
// Decompose 256-bit ops into 128-bit ops.
26350
26334
if (VT.is256BitVector() && !Subtarget.hasInt256())
26351
- return split256IntArith (Op, DAG);
26335
+ return splitVectorIntBinary (Op, DAG);
26352
26336
26353
26337
SDValue A = Op.getOperand(0);
26354
26338
SDValue B = Op.getOperand(1);
@@ -26494,7 +26478,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
26494
26478
26495
26479
// Decompose 256-bit ops into 128-bit ops.
26496
26480
if (VT.is256BitVector() && !Subtarget.hasInt256())
26497
- return split256IntArith (Op, DAG);
26481
+ return splitVectorIntBinary (Op, DAG);
26498
26482
26499
26483
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
26500
26484
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
@@ -26586,7 +26570,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
26586
26570
// For signed 512-bit vectors, split into 256-bit vectors to allow the
26587
26571
// sign-extension to occur.
26588
26572
if (VT == MVT::v64i8 && IsSigned)
26589
- return split512IntArith (Op, DAG);
26573
+ return splitVectorIntBinary (Op, DAG);
26590
26574
26591
26575
// Signed AVX2 implementation - extend xmm subvectors to ymm.
26592
26576
if (VT == MVT::v32i8 && IsSigned) {
@@ -27560,7 +27544,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
27560
27544
27561
27545
// Decompose 256-bit shifts into 128-bit shifts.
27562
27546
if (VT.is256BitVector())
27563
- return split256IntArith (Op, DAG);
27547
+ return splitVectorIntBinary (Op, DAG);
27564
27548
27565
27549
return SDValue();
27566
27550
}
@@ -27606,7 +27590,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
27606
27590
// XOP implicitly uses modulo rotation amounts.
27607
27591
if (Subtarget.hasXOP()) {
27608
27592
if (VT.is256BitVector())
27609
- return split256IntArith (Op, DAG);
27593
+ return splitVectorIntBinary (Op, DAG);
27610
27594
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
27611
27595
27612
27596
// Attempt to rotate by immediate.
@@ -27622,7 +27606,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
27622
27606
27623
27607
// Split 256-bit integers on pre-AVX2 targets.
27624
27608
if (VT.is256BitVector() && !Subtarget.hasAVX2())
27625
- return split256IntArith (Op, DAG);
27609
+ return splitVectorIntBinary (Op, DAG);
27626
27610
27627
27611
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
27628
27612
((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
@@ -28287,11 +28271,11 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
28287
28271
28288
28272
// Decompose 256-bit ops into smaller 128-bit ops.
28289
28273
if (VT.is256BitVector() && !Subtarget.hasInt256())
28290
- return Lower256IntUnary (Op, DAG);
28274
+ return splitVectorIntUnary (Op, DAG);
28291
28275
28292
28276
// Decompose 512-bit ops into smaller 256-bit ops.
28293
28277
if (VT.is512BitVector() && !Subtarget.hasBWI())
28294
- return Lower512IntUnary (Op, DAG);
28278
+ return splitVectorIntUnary (Op, DAG);
28295
28279
28296
28280
// For element types greater than i8, do vXi8 pop counts and a bytesum.
28297
28281
if (VT.getScalarType() != MVT::i8) {
@@ -28335,7 +28319,7 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
28335
28319
28336
28320
// Decompose 256-bit ops into smaller 128-bit ops.
28337
28321
if (VT.is256BitVector())
28338
- return Lower256IntUnary (Op, DAG);
28322
+ return splitVectorIntUnary (Op, DAG);
28339
28323
28340
28324
assert(VT.is128BitVector() &&
28341
28325
"Only 128-bit vector bitreverse lowering supported.");
@@ -28376,7 +28360,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
28376
28360
// lowering.
28377
28361
if (VT == MVT::v8i64 || VT == MVT::v16i32) {
28378
28362
assert(!Subtarget.hasBWI() && "BWI should Expand BITREVERSE");
28379
- return Lower512IntUnary (Op, DAG);
28363
+ return splitVectorIntUnary (Op, DAG);
28380
28364
}
28381
28365
28382
28366
unsigned NumElts = VT.getVectorNumElements();
@@ -28385,7 +28369,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
28385
28369
28386
28370
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
28387
28371
if (VT.is256BitVector() && !Subtarget.hasInt256())
28388
- return Lower256IntUnary (Op, DAG);
28372
+ return splitVectorIntUnary (Op, DAG);
28389
28373
28390
28374
// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
28391
28375
// two nibbles and a PSHUFB lookup to find the bitreverse of each
@@ -47137,7 +47121,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
47137
47121
if (isConcatenatedNot(InVecBC.getOperand(0)) ||
47138
47122
isConcatenatedNot(InVecBC.getOperand(1))) {
47139
47123
// extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
47140
- SDValue Concat = split256IntArith (InVecBC, DAG);
47124
+ SDValue Concat = splitVectorIntBinary (InVecBC, DAG);
47141
47125
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
47142
47126
DAG.getBitcast(InVecVT, Concat), N->getOperand(1));
47143
47127
}
0 commit comments