Skip to content

Commit 9734b22

Browse files
committed
[X86] combineCMP - use widenMaskVector to allow us to handle sub-i8 mask cases when just comparing a bool element against zero
1 parent b397921 commit 9734b22

File tree

4 files changed

+18
-26
lines changed

4 files changed

+18
-26
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53455,7 +53455,8 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
5345553455
return true;
5345653456
}
5345753457

53458-
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
53458+
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
53459+
const X86Subtarget &Subtarget) {
5345953460
// Only handle test patterns.
5346053461
if (!isNullConstant(N->getOperand(1)))
5346153462
return SDValue();
@@ -53493,7 +53494,6 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
5349353494

5349453495
// If we're extracting from a avx512 bool vector and comparing against zero,
5349553496
// then try to just bitcast the vector to an integer to use TEST/BT directly.
53496-
// TODO: Handle v2i1/v4i1 bool vector cases.
5349753497
// (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
5349853498
if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
5349953499
Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
@@ -53502,16 +53502,16 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
5350253502
isNullConstant(Src.getOperand(1)) &&
5350353503
Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
5350453504
SDValue BoolVec = Src.getOperand(0);
53505+
unsigned ShAmt = 0;
53506+
if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
53507+
ShAmt = BoolVec.getConstantOperandVal(1);
53508+
BoolVec = BoolVec.getOperand(0);
53509+
}
53510+
BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
5350553511
EVT VecVT = BoolVec.getValueType();
5350653512
unsigned BitWidth = VecVT.getVectorNumElements();
5350753513
EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
5350853514
if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
53509-
unsigned ShAmt = 0;
53510-
if (BoolVec.getOpcode() == X86ISD::KSHIFTR &&
53511-
BoolVec.getConstantOperandAPInt(1).ult(BitWidth)) {
53512-
ShAmt = BoolVec.getConstantOperandVal(1);
53513-
BoolVec = BoolVec.getOperand(0);
53514-
}
5351553515
APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
5351653516
Op = DAG.getBitcast(BCVT, BoolVec);
5351753517
Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
@@ -55800,7 +55800,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5580055800
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
5580155801
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
5580255802
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
55803-
case X86ISD::CMP: return combineCMP(N, DAG);
55803+
case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
5580455804
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
5580555805
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5580655806
case X86ISD::ADD:

llvm/test/CodeGen/X86/movmsk-cmp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4351,8 +4351,7 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
43514351
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
43524352
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
43534353
; KNL-NEXT: kmovw %k0, %ecx
4354-
; KNL-NEXT: kmovw %k0, %eax
4355-
; KNL-NEXT: testb $2, %al
4354+
; KNL-NEXT: testb $2, %cl
43564355
; KNL-NEXT: movl $42, %eax
43574356
; KNL-NEXT: movl $99, %edx
43584357
; KNL-NEXT: cmovel %edx, %eax
@@ -4365,8 +4364,7 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
43654364
; SKX: # %bb.0:
43664365
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
43674366
; SKX-NEXT: kmovd %k0, %ecx
4368-
; SKX-NEXT: kmovd %k0, %eax
4369-
; SKX-NEXT: testb $2, %al
4367+
; SKX-NEXT: testb $2, %cl
43704368
; SKX-NEXT: movl $42, %eax
43714369
; SKX-NEXT: movl $99, %edx
43724370
; SKX-NEXT: cmovel %edx, %eax

llvm/test/CodeGen/X86/pr33349.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ target triple = "x86_64-unknown-linux-gnu"
1212
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1313
; KNL-NEXT: kshiftrw $2, %k0, %k1
1414
; KNL-NEXT: kmovw %k1, %eax
15-
; KNL-NEXT: kmovw %k1, %ecx
16-
; KNL-NEXT: testb $1, %cl
15+
; KNL-NEXT: testb $1, %al
1716
; KNL-NEXT: fld1
1817
; KNL-NEXT: fldz
1918
; KNL-NEXT: fld %st(0)
@@ -22,8 +21,7 @@ target triple = "x86_64-unknown-linux-gnu"
2221
; KNL-NEXT: fld %st(1)
2322
; KNL-NEXT: fcmovne %st(3), %st
2423
; KNL-NEXT: kmovw %k0, %eax
25-
; KNL-NEXT: kmovw %k0, %ecx
26-
; KNL-NEXT: testb $1, %cl
24+
; KNL-NEXT: testb $1, %al
2725
; KNL-NEXT: fld %st(2)
2826
; KNL-NEXT: fcmovne %st(4), %st
2927
; KNL-NEXT: testb $2, %al
@@ -46,8 +44,7 @@ target triple = "x86_64-unknown-linux-gnu"
4644
; SKX-NEXT: vpmovd2m %xmm0, %k0
4745
; SKX-NEXT: kshiftrb $2, %k0, %k1
4846
; SKX-NEXT: kmovd %k1, %eax
49-
; SKX-NEXT: kmovd %k1, %ecx
50-
; SKX-NEXT: testb $1, %cl
47+
; SKX-NEXT: testb $1, %al
5148
; SKX-NEXT: fld1
5249
; SKX-NEXT: fldz
5350
; SKX-NEXT: fld %st(0)
@@ -56,8 +53,7 @@ target triple = "x86_64-unknown-linux-gnu"
5653
; SKX-NEXT: fld %st(1)
5754
; SKX-NEXT: fcmovne %st(3), %st
5855
; SKX-NEXT: kmovd %k0, %eax
59-
; SKX-NEXT: kmovd %k0, %ecx
60-
; SKX-NEXT: testb $1, %cl
56+
; SKX-NEXT: testb $1, %al
6157
; SKX-NEXT: fld %st(2)
6258
; SKX-NEXT: fcmovne %st(4), %st
6359
; SKX-NEXT: testb $2, %al

llvm/test/CodeGen/X86/pr34177.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,9 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
4949
; AVX512VL-LABEL: test:
5050
; AVX512VL: # %bb.0:
5151
; AVX512VL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
52-
; AVX512VL-NEXT: kmovd %k0, %eax
5352
; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
54-
; AVX512VL-NEXT: kmovd %k0, %ecx
55-
; AVX512VL-NEXT: testb $2, %cl
53+
; AVX512VL-NEXT: kmovd %k0, %eax
54+
; AVX512VL-NEXT: testb $2, %al
5655
; AVX512VL-NEXT: fld1
5756
; AVX512VL-NEXT: fldz
5857
; AVX512VL-NEXT: fld %st(0)
@@ -61,8 +60,7 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
6160
; AVX512VL-NEXT: fld %st(1)
6261
; AVX512VL-NEXT: fcmovne %st(3), %st
6362
; AVX512VL-NEXT: kmovd %k1, %eax
64-
; AVX512VL-NEXT: kmovd %k1, %ecx
65-
; AVX512VL-NEXT: testb $2, %cl
63+
; AVX512VL-NEXT: testb $2, %al
6664
; AVX512VL-NEXT: fld %st(2)
6765
; AVX512VL-NEXT: fcmovne %st(4), %st
6866
; AVX512VL-NEXT: testb $1, %al

0 commit comments

Comments
 (0)