@@ -114,8 +114,8 @@ class X86InstructionSelector : public InstructionSelector {
114
114
bool materializeFP (MachineInstr &I, MachineRegisterInfo &MRI,
115
115
MachineFunction &MF) const ;
116
116
bool selectImplicitDefOrPHI (MachineInstr &I, MachineRegisterInfo &MRI) const ;
117
- bool selectDivRem (MachineInstr &I, MachineRegisterInfo &MRI,
118
- MachineFunction &MF) const ;
117
+ bool selectMulDivRem (MachineInstr &I, MachineRegisterInfo &MRI,
118
+ MachineFunction &MF) const ;
119
119
bool selectIntrinsicWSideEffects (MachineInstr &I, MachineRegisterInfo &MRI,
120
120
MachineFunction &MF) const ;
121
121
@@ -421,11 +421,14 @@ bool X86InstructionSelector::select(MachineInstr &I) {
421
421
case TargetOpcode::G_IMPLICIT_DEF:
422
422
case TargetOpcode::G_PHI:
423
423
return selectImplicitDefOrPHI (I, MRI);
424
+ case TargetOpcode::G_MUL:
425
+ case TargetOpcode::G_SMULH:
426
+ case TargetOpcode::G_UMULH:
424
427
case TargetOpcode::G_SDIV:
425
428
case TargetOpcode::G_UDIV:
426
429
case TargetOpcode::G_SREM:
427
430
case TargetOpcode::G_UREM:
428
- return selectDivRem (I, MRI, MF);
431
+ return selectMulDivRem (I, MRI, MF);
429
432
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
430
433
return selectIntrinsicWSideEffects (I, MRI, MF);
431
434
}
@@ -1558,11 +1561,14 @@ bool X86InstructionSelector::selectImplicitDefOrPHI(
1558
1561
return true ;
1559
1562
}
1560
1563
1561
- bool X86InstructionSelector::selectDivRem (MachineInstr &I,
1562
- MachineRegisterInfo &MRI,
1563
- MachineFunction &MF) const {
1564
- // The implementation of this function is taken from X86FastISel.
1565
- assert ((I.getOpcode () == TargetOpcode::G_SDIV ||
1564
+ bool X86InstructionSelector::selectMulDivRem (MachineInstr &I,
1565
+ MachineRegisterInfo &MRI,
1566
+ MachineFunction &MF) const {
1567
+ // The implementation of this function is adapted from X86FastISel.
1568
+ assert ((I.getOpcode () == TargetOpcode::G_MUL ||
1569
+ I.getOpcode () == TargetOpcode::G_SMULH ||
1570
+ I.getOpcode () == TargetOpcode::G_UMULH ||
1571
+ I.getOpcode () == TargetOpcode::G_SDIV ||
1566
1572
I.getOpcode () == TargetOpcode::G_SREM ||
1567
1573
I.getOpcode () == TargetOpcode::G_UDIV ||
1568
1574
I.getOpcode () == TargetOpcode::G_UREM) &&
@@ -1581,10 +1587,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1581
1587
return false ;
1582
1588
1583
1589
const static unsigned NumTypes = 4 ; // i8, i16, i32, i64
1584
- const static unsigned NumOps = 4 ; // SDiv, SRem, UDiv, URem
1590
+ const static unsigned NumOps = 7 ; // SDiv/ SRem/ UDiv/ URem/Mul/SMulH/UMulh
1585
1591
const static bool S = true ; // IsSigned
1586
1592
const static bool U = false ; // !IsSigned
1587
1593
const static unsigned Copy = TargetOpcode::COPY;
1594
+
1588
1595
// For the X86 IDIV instruction, in most cases the dividend
1589
1596
// (numerator) must be in a specific register pair highreg:lowreg,
1590
1597
// producing the quotient in lowreg and the remainder in highreg.
@@ -1593,19 +1600,19 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1593
1600
// exception is i8, where the dividend is defined as a single register rather
1594
1601
// than a register pair, and we therefore directly sign-extend the dividend
1595
1602
// into lowreg, instead of copying, and ignore the highreg.
1596
- const static struct DivRemEntry {
1603
+ const static struct MulDivRemEntry {
1597
1604
// The following portion depends only on the data type.
1598
1605
unsigned SizeInBits;
1599
1606
unsigned LowInReg; // low part of the register pair
1600
1607
unsigned HighInReg; // high part of the register pair
1601
1608
// The following portion depends on both the data type and the operation.
1602
- struct DivRemResult {
1603
- unsigned OpDivRem ; // The specific DIV/IDIV opcode to use.
1609
+ struct MulDivRemResult {
1610
+ unsigned OpMulDivRem ; // The specific MUL/DIV opcode to use.
1604
1611
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1605
1612
// highreg, or copying a zero into highreg.
1606
1613
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1607
1614
// zero/sign-extending into lowreg for i8.
1608
- unsigned DivRemResultReg; // Register containing the desired result.
1615
+ unsigned ResultReg; // Register containing the desired result.
1609
1616
bool IsOpSigned; // Whether to use signed or unsigned form.
1610
1617
} ResultTable[NumOps];
1611
1618
} OpTable[NumTypes] = {
@@ -1617,25 +1624,34 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1617
1624
{X86::IDIV8r, 0 , X86::MOVSX16rr8, X86::AH, S}, // SRem
1618
1625
{X86::DIV8r, 0 , X86::MOVZX16rr8, X86::AL, U}, // UDiv
1619
1626
{X86::DIV8r, 0 , X86::MOVZX16rr8, X86::AH, U}, // URem
1627
+ {X86::IMUL8r, 0 , X86::MOVSX16rr8, X86::AL, S}, // Mul
1628
+ {X86::IMUL8r, 0 , X86::MOVSX16rr8, X86::AH, S}, // SMulH
1629
+ {X86::MUL8r, 0 , X86::MOVZX16rr8, X86::AH, U}, // UMulH
1620
1630
}}, // i8
1621
1631
{16 ,
1622
1632
X86::AX,
1623
1633
X86::DX,
1624
1634
{
1625
- {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
1626
- {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
1627
- {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
1628
- {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
1629
- }}, // i16
1635
+ {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
1636
+ {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
1637
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
1638
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
1639
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul
1640
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH
1641
+ {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH
1642
+ }}, // i16
1630
1643
{32 ,
1631
1644
X86::EAX,
1632
1645
X86::EDX,
1633
1646
{
1634
- {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
1635
- {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
1636
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
1637
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
1638
- }}, // i32
1647
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
1648
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
1649
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
1650
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
1651
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul
1652
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH
1653
+ {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH
1654
+ }}, // i32
1639
1655
{64 ,
1640
1656
X86::RAX,
1641
1657
X86::RDX,
@@ -1644,10 +1660,13 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1644
1660
{X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
1645
1661
{X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
1646
1662
{X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
1647
- }}, // i64
1663
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul
1664
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH
1665
+ {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH
1666
+ }}, // i64
1648
1667
};
1649
1668
1650
- auto OpEntryIt = llvm::find_if (OpTable, [RegTy](const DivRemEntry &El) {
1669
+ auto OpEntryIt = llvm::find_if (OpTable, [RegTy](const MulDivRemEntry &El) {
1651
1670
return El.SizeInBits == RegTy.getSizeInBits ();
1652
1671
});
1653
1672
if (OpEntryIt == std::end (OpTable))
@@ -1656,7 +1675,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1656
1675
unsigned OpIndex;
1657
1676
switch (I.getOpcode ()) {
1658
1677
default :
1659
- llvm_unreachable (" Unexpected div/rem opcode" );
1678
+ llvm_unreachable (" Unexpected mul/ div/rem opcode" );
1660
1679
case TargetOpcode::G_SDIV:
1661
1680
OpIndex = 0 ;
1662
1681
break ;
@@ -1669,10 +1688,20 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1669
1688
case TargetOpcode::G_UREM:
1670
1689
OpIndex = 3 ;
1671
1690
break ;
1691
+ case TargetOpcode::G_MUL:
1692
+ OpIndex = 4 ;
1693
+ break ;
1694
+ case TargetOpcode::G_SMULH:
1695
+ OpIndex = 5 ;
1696
+ break ;
1697
+ case TargetOpcode::G_UMULH:
1698
+ OpIndex = 6 ;
1699
+ break ;
1672
1700
}
1673
1701
1674
- const DivRemEntry &TypeEntry = *OpEntryIt;
1675
- const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable [OpIndex];
1702
+ const MulDivRemEntry &TypeEntry = *OpEntryIt;
1703
+ const MulDivRemEntry::MulDivRemResult &OpEntry =
1704
+ TypeEntry.ResultTable [OpIndex];
1676
1705
1677
1706
const TargetRegisterClass *RegRC = getRegClass (RegTy, *RegRB);
1678
1707
if (!RBI.constrainGenericRegister (Op1Reg, *RegRC, MRI) ||
@@ -1687,6 +1716,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1687
1716
BuildMI (*I.getParent (), I, I.getDebugLoc (), TII.get (OpEntry.OpCopy ),
1688
1717
TypeEntry.LowInReg )
1689
1718
.addReg (Op1Reg);
1719
+
1690
1720
// Zero-extend or sign-extend into high-order input register.
1691
1721
if (OpEntry.OpSignExtend ) {
1692
1722
if (OpEntry.IsOpSigned )
@@ -1717,9 +1747,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1717
1747
}
1718
1748
}
1719
1749
}
1720
- // Generate the DIV/IDIV instruction.
1721
- BuildMI (*I.getParent (), I, I.getDebugLoc (), TII.get (OpEntry.OpDivRem ))
1750
+
1751
+ // Generate the DIV/IDIV/MUL/IMUL instruction.
1752
+ BuildMI (*I.getParent (), I, I.getDebugLoc (), TII.get (OpEntry.OpMulDivRem ))
1722
1753
.addReg (Op2Reg);
1754
+
1723
1755
// For i8 remainder, we can't reference ah directly, as we'll end
1724
1756
// up with bogus copies like %r9b = COPY %ah. Reference ax
1725
1757
// instead to prevent ah references in a rex instruction.
@@ -1728,7 +1760,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1728
1760
// won't generate explicit references to the GR8_NOREX registers. If
1729
1761
// the allocator and/or the backend get enhanced to be more robust in
1730
1762
// that regard, this can be, and should be, removed.
1731
- if (OpEntry.DivRemResultReg == X86::AH && STI.is64Bit ()) {
1763
+ if (OpEntry.ResultReg == X86::AH && STI.is64Bit ()) {
1732
1764
Register SourceSuperReg = MRI.createVirtualRegister (&X86::GR16RegClass);
1733
1765
Register ResultSuperReg = MRI.createVirtualRegister (&X86::GR16RegClass);
1734
1766
BuildMI (*I.getParent (), I, I.getDebugLoc (), TII.get (Copy), SourceSuperReg)
@@ -1750,9 +1782,10 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
1750
1782
} else {
1751
1783
BuildMI (*I.getParent (), I, I.getDebugLoc (), TII.get (TargetOpcode::COPY),
1752
1784
DstReg)
1753
- .addReg (OpEntry.DivRemResultReg );
1785
+ .addReg (OpEntry.ResultReg );
1754
1786
}
1755
1787
I.eraseFromParent ();
1788
+
1756
1789
return true ;
1757
1790
}
1758
1791
0 commit comments