Skip to content

Commit 0bfeede

Browse files
committed
[X86][SSE] Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) -> EXTEND_VECTOR_INREG(X)
1 parent bb0078e commit 0bfeede

File tree

2 files changed

+36
-10
lines changed

2 files changed

+36
-10
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6184,6 +6184,22 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
61846184
return DAG.getBitcast(VT, Vec);
61856185
}
61866186

6187+
// Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
6188+
static unsigned getOpcode_EXTEND(unsigned Opcode) {
6189+
switch (Opcode) {
6190+
case ISD::ANY_EXTEND:
6191+
case ISD::ANY_EXTEND_VECTOR_INREG:
6192+
return ISD::ANY_EXTEND;
6193+
case ISD::ZERO_EXTEND:
6194+
case ISD::ZERO_EXTEND_VECTOR_INREG:
6195+
return ISD::ZERO_EXTEND;
6196+
case ISD::SIGN_EXTEND:
6197+
case ISD::SIGN_EXTEND_VECTOR_INREG:
6198+
return ISD::SIGN_EXTEND;
6199+
}
6200+
llvm_unreachable("Unknown opcode");
6201+
}
6202+
61876203
// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
61886204
static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
61896205
switch (Opcode) {
@@ -49258,6 +49274,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
4925849274
EVT VT = N->getValueType(0);
4925949275
SDValue In = N->getOperand(0);
4926049276
unsigned Opcode = N->getOpcode();
49277+
unsigned InOpcode = In.getOpcode();
4926149278
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4926249279

4926349280
// Try to merge vector loads and extend_inreg to an extload.
@@ -49283,9 +49300,18 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
4928349300
}
4928449301

4928549302
// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
49286-
if (Opcode == In.getOpcode())
49303+
if (Opcode == InOpcode)
4928749304
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
4928849305

49306+
// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
49307+
// -> EXTEND_VECTOR_INREG(X).
49308+
// TODO: Handle non-zero subvector indices.
49309+
if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 &&
49310+
In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
49311+
In.getOperand(0).getOperand(0).getValueSizeInBits() ==
49312+
In.getValueSizeInBits())
49313+
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
49314+
4928949315
// Attempt to combine as a shuffle.
4929049316
// TODO: General ZERO_EXTEND_VECTOR_INREG support.
4929149317
if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||

llvm/test/CodeGen/X86/min-legal-vector-width.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -976,18 +976,18 @@ define void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal
976976
define void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" {
977977
; CHECK-LABEL: sext_v16i8_v16i64:
978978
; CHECK: # %bb.0:
979-
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
980-
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
981-
; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1
982-
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
983-
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
984-
; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3
985-
; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
979+
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1
980+
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
986981
; CHECK-NEXT: vpmovsxwq %xmm2, %ymm2
987-
; CHECK-NEXT: vmovdqa %ymm2, 64(%rdi)
982+
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
983+
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
984+
; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3
985+
; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1
986+
; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
988987
; CHECK-NEXT: vmovdqa %ymm0, (%rdi)
988+
; CHECK-NEXT: vmovdqa %ymm1, 64(%rdi)
989989
; CHECK-NEXT: vmovdqa %ymm3, 96(%rdi)
990-
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdi)
990+
; CHECK-NEXT: vmovdqa %ymm2, 32(%rdi)
991991
; CHECK-NEXT: vzeroupper
992992
; CHECK-NEXT: retq
993993
%a = sext <16 x i8> %x to <16 x i64>

0 commit comments

Comments
 (0)