Skip to content

Commit 3a77522

Browse files
authored
[AArch64][GlobalISel] Improve and expand fcopysign lowering (llvm#71283)
This alters the lowering of G_COPYSIGN to support vector types. The general idea is that we just lower it to vector operations using and/or and a mask, which are now converted to a BIF/BIT/BSP. In the process the existing AArch64LegalizerInfo::legalizeFCopySign can be removed, replying on expanding the scalar versions to vector instead, which just needs a small adjustment to allow widening scalars to vectors.
1 parent ccc20b4 commit 3a77522

File tree

7 files changed

+454
-419
lines changed

7 files changed

+454
-419
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5210,6 +5210,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
52105210
case TargetOpcode::G_FSUB:
52115211
case TargetOpcode::G_FMUL:
52125212
case TargetOpcode::G_FDIV:
5213+
case TargetOpcode::G_FCOPYSIGN:
52135214
case TargetOpcode::G_UADDSAT:
52145215
case TargetOpcode::G_USUBSAT:
52155216
case TargetOpcode::G_SADDSAT:

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -269,14 +269,19 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
269269
LLT ResTy = Res.getLLTTy(*getMRI());
270270
LLT Op0Ty = Op0.getLLTTy(*getMRI());
271271

272-
assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
273-
assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
272+
assert(Op0Ty.isVector() && "Non vector type");
273+
assert(((ResTy.isScalar() && (ResTy == Op0Ty.getElementType())) ||
274+
(ResTy.isVector() &&
275+
(ResTy.getElementType() == Op0Ty.getElementType()))) &&
274276
"Different vector element types");
275-
assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
276-
"Op0 has fewer elements");
277+
assert(
278+
(ResTy.isScalar() || (ResTy.getNumElements() < Op0Ty.getNumElements())) &&
279+
"Op0 has fewer elements");
277280

278-
SmallVector<Register, 8> Regs;
279281
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
282+
if (ResTy.isScalar())
283+
return buildCopy(Res, Unmerge.getReg(0));
284+
SmallVector<Register, 8> Regs;
280285
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
281286
Regs.push_back(Unmerge.getReg(i));
282287
return buildMergeLikeInstr(Res, Regs);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 8 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,10 +1157,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
11571157
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
11581158
.legalFor({{s64, s32}, {s64, s64}});
11591159

1160-
// TODO: Custom legalization for vector types.
11611160
// TODO: Custom legalization for mismatched types.
1162-
// TODO: s16 support.
1163-
getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
1161+
getActionDefinitionsBuilder(G_FCOPYSIGN)
1162+
.moreElementsIf(
1163+
[](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1164+
[=](const LegalityQuery &Query) {
1165+
const LLT Ty = Query.Types[0];
1166+
return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1167+
})
1168+
.lower();
11641169

11651170
getActionDefinitionsBuilder(G_FMAD).lower();
11661171

@@ -1217,8 +1222,6 @@ bool AArch64LegalizerInfo::legalizeCustom(
12171222
case TargetOpcode::G_MEMMOVE:
12181223
case TargetOpcode::G_MEMSET:
12191224
return legalizeMemOps(MI, Helper);
1220-
case TargetOpcode::G_FCOPYSIGN:
1221-
return legalizeFCopySign(MI, Helper);
12221225
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
12231226
return legalizeExtractVectorElt(MI, MRI, Helper);
12241227
case TargetOpcode::G_DYN_STACKALLOC:
@@ -1960,66 +1963,6 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
19601963
return false;
19611964
}
19621965

1963-
bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1964-
LegalizerHelper &Helper) const {
1965-
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1966-
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1967-
Register Dst = MI.getOperand(0).getReg();
1968-
LLT DstTy = MRI.getType(Dst);
1969-
assert(DstTy.isScalar() && "Only expected scalars right now!");
1970-
const unsigned DstSize = DstTy.getSizeInBits();
1971-
assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1972-
assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1973-
"Expected homogeneous types!");
1974-
1975-
// We want to materialize a mask with the high bit set.
1976-
uint64_t EltMask;
1977-
LLT VecTy;
1978-
1979-
// TODO: s16 support.
1980-
switch (DstSize) {
1981-
default:
1982-
llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1983-
case 64: {
1984-
// AdvSIMD immediate moves cannot materialize out mask in a single
1985-
// instruction for 64-bit elements. Instead, materialize zero and then
1986-
// negate it.
1987-
EltMask = 0;
1988-
VecTy = LLT::fixed_vector(2, DstTy);
1989-
break;
1990-
}
1991-
case 32:
1992-
EltMask = 0x80000000ULL;
1993-
VecTy = LLT::fixed_vector(4, DstTy);
1994-
break;
1995-
}
1996-
1997-
// Widen In1 and In2 to 128 bits. We want these to eventually become
1998-
// INSERT_SUBREGs.
1999-
auto Undef = MIRBuilder.buildUndef(VecTy);
2000-
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
2001-
auto Ins1 = MIRBuilder.buildInsertVectorElement(
2002-
VecTy, Undef, MI.getOperand(1).getReg(), Zero);
2003-
auto Ins2 = MIRBuilder.buildInsertVectorElement(
2004-
VecTy, Undef, MI.getOperand(2).getReg(), Zero);
2005-
2006-
// Construct the mask.
2007-
auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
2008-
if (DstSize == 64)
2009-
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
2010-
2011-
auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
2012-
2013-
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
2014-
// want this to eventually become an EXTRACT_SUBREG.
2015-
SmallVector<Register, 2> DstRegs(1, Dst);
2016-
for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
2017-
DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
2018-
MIRBuilder.buildUnmerge(DstRegs, Sel);
2019-
MI.eraseFromParent();
2020-
return true;
2021-
}
2022-
20231966
bool AArch64LegalizerInfo::legalizeExtractVectorElt(
20241967
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
20251968
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
6060
LegalizerHelper &Helper) const;
6161
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
6262
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
63-
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
6463
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
6564
LegalizerHelper &Helper) const;
6665
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,18 @@ body: |
1313
; CHECK-NEXT: {{ $}}
1414
; CHECK-NEXT: %val:_(s32) = COPY $s0
1515
; CHECK-NEXT: %sign:_(s32) = COPY $s1
16-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
17-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
18-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
19-
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
20-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
21-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
22-
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC1]], [[IVEC]]
23-
; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
16+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
17+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %val(s32), [[DEF]](s32)
18+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %sign(s32), [[DEF]](s32)
19+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
20+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
21+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
22+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
23+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
24+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
25+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
26+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
27+
; CHECK-NEXT: %fcopysign:_(s32) = COPY [[UV]](s32)
2428
; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
2529
; CHECK-NEXT: RET_ReallyLR implicit $s0
2630
%val:_(s32) = COPY $s0
@@ -41,14 +45,18 @@ body: |
4145
; CHECK-NEXT: {{ $}}
4246
; CHECK-NEXT: %val:_(s64) = COPY $d0
4347
; CHECK-NEXT: %sign:_(s64) = COPY $d1
44-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
45-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
46-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
47-
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
48-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
49-
; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
50-
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC1]], [[IVEC]]
51-
; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
49+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %val(s64), [[DEF]](s64)
50+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %sign(s64), [[DEF]](s64)
51+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
52+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
53+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
54+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
55+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
56+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
57+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
58+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[OR]](<2 x s64>)
59+
; CHECK-NEXT: %fcopysign:_(s64) = COPY [[UV]](s64)
5260
; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
5361
; CHECK-NEXT: RET_ReallyLR implicit $d0
5462
%val:_(s64) = COPY $d0

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,8 @@
526526
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
527527
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
528528
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
529-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
530-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
529+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
530+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
531531
# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
532532
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
533533
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected

0 commit comments

Comments
 (0)