47 #include "llvm/IR/IntrinsicsPowerPC.h"
71 #define DEBUG_TYPE "ppc-codegen"
74 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
76 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
78 "Number of sign extensions for compare inputs added.");
80 "Number of zero extensions for compare inputs added.");
82 "Number of logical ops on i1 values calculated in GPR.");
84 "Number of compares not eliminated as they have non-extending uses.");
86 "Number of compares lowered to setb.");
94 cl::desc(
"use aggressive ppc isel for bit permutations"),
97 "ppc-bit-perm-rewriter-stress-rotates",
98 cl::desc(
"stress rotate selection in aggressive ppc isel for "
103 "ppc-use-branch-hint",
cl::init(
true),
104 cl::desc(
"Enable static hinting of branches on ppc"),
109 cl::desc(
"Enable tls optimization peephole"),
118 cl::desc(
"Specify the types of comparisons to emit GPR-only code for."),
124 "Only comparisons where inputs don't need [sz]ext."),
127 "Only i32 comparisons with zext result."),
129 "Only i64 comparisons with zext result."),
132 "Only i32 comparisons with sext result."),
134 "Only i64 comparisons with sext result.")));
170 void PreprocessISelDAG()
override;
171 void PostprocessISelDAG()
override;
176 return CurDAG->getTargetConstant(
Imm, dl,
MVT::i16);
182 return CurDAG->getTargetConstant(
Imm, dl,
MVT::i32);
188 return CurDAG->getTargetConstant(
Imm, dl,
MVT::i64);
193 return CurDAG->getTargetConstant(
194 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
199 static bool isRotateAndMask(
SDNode *
N,
unsigned Mask,
bool isShiftMask,
200 unsigned &SH,
unsigned &MB,
unsigned &ME);
204 SDNode *getGlobalBaseReg();
212 bool tryBitfieldInsert(
SDNode *
N);
213 bool tryBitPermutation(
SDNode *
N);
214 bool tryIntCompareInGPR(
SDNode *
N);
246 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
254 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
262 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
270 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
277 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
284 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
292 return PPCLowering->SelectForceXFormMode(
N, Disp,
Base, *CurDAG) ==
303 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
None);
313 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
324 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
331 return PPCLowering->SelectAddressRegRegOnly(
N,
Base,
Index, *CurDAG);
340 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
None);
347 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
Align(4));
354 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
362 return PPCLowering->SelectAddressRegImm34(
N, Disp,
Base, *CurDAG);
372 return PPCLowering->SelectAddressPCRel(
N,
Base);
380 bool SelectInlineAsmMemoryOperand(
const SDValue &
Op,
381 unsigned ConstraintID,
382 std::vector<SDValue> &OutOps)
override {
383 switch(ConstraintID) {
385 errs() <<
"ConstraintID: " << ConstraintID <<
"\n";
400 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
401 dl,
Op.getValueType(),
404 OutOps.push_back(NewOp);
411 return "PowerPC DAG->DAG Pattern Instruction Selection";
415 #include "PPCGenDAGISel.inc"
419 bool tryFoldSWTestBRCC(
SDNode *
N);
420 bool tryAsSingleRLDICL(
SDNode *
N);
421 bool tryAsSingleRLDICR(
SDNode *
N);
422 bool tryAsSingleRLWINM(
SDNode *
N);
423 bool tryAsSingleRLWINM8(
SDNode *
N);
424 bool tryAsSingleRLWIMI(
SDNode *
N);
425 bool tryAsPairOfRLDICL(
SDNode *
N);
426 bool tryAsSingleRLDIMI(
SDNode *
N);
428 void PeepholePPC64();
429 void PeepholePPC64ZExt();
430 void PeepholeCROps();
435 bool AllUsersSelectZero(
SDNode *
N);
436 void SwapAllSelectUsers(
SDNode *
N);
438 bool isOffsetMultipleOf(
SDNode *
N,
unsigned Val)
const;
447 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
456 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) ==
MVT::i32) {
467 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
475 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
490 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
496 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
518 "GlobalVariables with an alignment requirement stricter than TOC entry "
519 "size not supported by the toc data transformation.");
523 assert(GVType->
isSized() &&
"A GlobalVariable's size must be known to be "
524 "supported by the toc data transformation.");
528 "supported by the toc data transformation.");
532 "supported by the toc data transformation.");
536 "supported by the toc data transformation.");
539 "A GlobalVariable with size larger than a TOC entry is not currently "
540 "supported by the toc data transformation.");
544 "currently supported by the toc data transformation.");
547 "Tentative definitions cannot have the mapping class XMC_TD.");
556 Imm = cast<ConstantSDNode>(
N)->getZExtValue();
566 Imm = cast<ConstantSDNode>(
N)->getZExtValue();
587 assert(isa<BasicBlockSDNode>(DestMBB));
621 <<
"::" <<
BB->getName() <<
"'\n"
622 <<
" -> " << TBB->
getName() <<
": " << TProb <<
"\n"
623 <<
" -> " << FBB->
getName() <<
": " << FProb <<
"\n");
639 return N->getOpcode() == Opc
645 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
646 SDValue TFI = CurDAG->getTargetFrameIndex(FI,
N->getValueType(0));
647 unsigned Opc =
N->getValueType(0) ==
MVT::i32 ? PPC::ADDI : PPC::ADDI8;
649 CurDAG->SelectNodeTo(SN, Opc,
N->getValueType(0), TFI,
650 getSmallIPtrImm(Offset, dl));
652 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), TFI,
653 getSmallIPtrImm(Offset, dl)));
656 bool PPCDAGToDAGISel::isRotateAndMask(
SDNode *
N,
unsigned Mask,
657 bool isShiftMask,
unsigned &SH,
658 unsigned &MB,
unsigned &ME) {
665 unsigned Indeterminant = ~0;
666 unsigned Opcode =
N->getOpcode();
667 if (
N->getNumOperands() != 2 ||
675 Indeterminant = ~(0xFFFFFFFFu <<
Shift);
680 Indeterminant = ~(0xFFFFFFFFu >>
Shift);
690 if (
Mask && !(
Mask & Indeterminant)) {
709 EVT MemVT =
ST->getMemoryVT();
710 EVT RegVT =
ST->getValue().getValueType();
717 Opcode = (RegVT ==
MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
721 Opcode = (RegVT ==
MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
725 Opcode = (RegVT ==
MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
729 Opcode = PPC::STDXTLS;
737 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
738 transferMemOperands(
ST, MN);
754 EVT MemVT =
LD->getMemoryVT();
755 EVT RegVT =
LD->getValueType(0);
761 Opcode = (RegVT ==
MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
765 Opcode = (RegVT ==
MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
769 Opcode = (RegVT ==
MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
773 Opcode = PPC::LDXTLS;
780 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
781 transferMemOperands(
LD, MN);
788 bool PPCDAGToDAGISel::tryBitfieldInsert(
SDNode *
N) {
793 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
794 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
799 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
802 unsigned Value, SH = 0;
803 TargetMask = ~TargetMask;
804 InsertMask = ~InsertMask;
852 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
854 ReplaceNode(
N, CurDAG->getMachineNode(PPC::RLWIMI, dl,
MVT::i32, Ops));
862 unsigned MaxTruncation = 0;
869 Use->isMachineOpcode() ?
Use->getMachineOpcode() :
Use->getOpcode();
873 if (
Use->isMachineOpcode())
876 std::max(MaxTruncation, (
unsigned)
Use->getValueType(0).getSizeInBits());
879 if (
Use->isMachineOpcode())
885 MaxTruncation =
std::max(MaxTruncation, MemVTSize);
894 MaxTruncation =
std::max(MaxTruncation, 32u);
902 MaxTruncation =
std::max(MaxTruncation, 16u);
910 MaxTruncation =
std::max(MaxTruncation, 8u);
914 return MaxTruncation;
920 unsigned HiTZ = countTrailingZeros<uint32_t>(
Hi_32(
Imm));
921 unsigned LoLZ = countLeadingZeros<uint32_t>(
Lo_32(
Imm));
922 if ((HiTZ + LoLZ) >= Num)
930 unsigned TZ = countTrailingZeros<uint64_t>(
Imm);
931 unsigned LZ = countLeadingZeros<uint64_t>(
Imm);
932 unsigned TO = countTrailingOnes<uint64_t>(
Imm);
933 unsigned LO = countLeadingOnes<uint64_t>(
Imm);
939 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
953 if (TZ > 15 && (LZ > 32 ||
LO > 32))
955 getI32Imm((
Imm >> 16) & 0xffff));
959 assert(LZ < 64 &&
"Unexpected leading zeros here.");
961 unsigned FO = countLeadingOnes<uint64_t>(
Imm << LZ);
966 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
969 getI32Imm(
Imm & 0xffff));
977 if ((LZ + FO + TZ) > 48) {
979 getI32Imm((
Imm >> TZ) & 0xffff));
981 getI32Imm(TZ), getI32Imm(LZ));
998 if ((LZ + TO) > 48) {
1002 assert(LZ <= 32 &&
"Unexpected shift value.");
1004 getI32Imm((
Imm >> (48 - LZ) & 0xffff)));
1006 getI32Imm(48 - LZ), getI32Imm(LZ));
1024 if ((LZ + FO + TO) > 48) {
1026 getI32Imm((
Imm >> TO) & 0xffff));
1028 getI32Imm(TO), getI32Imm(LZ));
1034 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1036 getI32Imm(Lo32 & 0xffff));
1038 getI32Imm(Lo32 >> 16));
1062 getI32Imm(RotImm & 0xffff));
1064 getI32Imm(
Shift), getI32Imm(0));
1076 if ((LZ + FO + TZ) > 32) {
1078 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1081 getI32Imm((
Imm >> TZ) & 0xffff));
1083 getI32Imm(TZ), getI32Imm(LZ));
1090 if ((LZ + TO) > 32) {
1094 assert(LZ <= 32 &&
"Unexpected shift value.");
1096 getI32Imm((
Imm >> (48 - LZ)) & 0xffff));
1098 getI32Imm((
Imm >> (32 - LZ)) & 0xffff));
1100 getI32Imm(32 - LZ), getI32Imm(LZ));
1108 if ((LZ + FO + TO) > 32) {
1110 getI32Imm((
Imm >> (TO + 16)) & 0xffff));
1112 getI32Imm((
Imm >> TO) & 0xffff));
1114 getI32Imm(TO), getI32Imm(LZ));
1119 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1120 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1123 getI32Imm(Lo32 & 0xffff));
1139 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1140 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1143 getI32Imm(RotImm & 0xffff));
1145 getI32Imm(
Shift), getI32Imm(0));
1159 unsigned TZ = countTrailingZeros<uint64_t>(
Imm);
1160 unsigned LZ = countLeadingZeros<uint64_t>(
Imm);
1161 unsigned TO = countTrailingOnes<uint64_t>(
Imm);
1162 unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (
Imm << LZ));
1166 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
1185 SDNode *Result =
nullptr;
1192 if ((LZ + FO + TZ) > 30) {
1198 getI32Imm(TZ), getI32Imm(LZ));
1214 if ((LZ + TO) > 30) {
1215 APInt SignedInt34 =
APInt(34, (
Imm >> (30 - LZ)) & 0x3ffffffff);
1220 getI32Imm(30 - LZ), getI32Imm(LZ));
1227 if ((LZ + FO + TO) > 30) {
1233 getI32Imm(TO), getI32Imm(LZ));
1247 if (isInt<34>(RotImm)) {
1278 unsigned *InstCnt =
nullptr) {
1279 unsigned InstCntDirect = 0;
1291 unsigned InstCntDirectP = 0;
1298 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1300 *InstCnt = InstCntDirectP;
1307 *InstCnt = InstCntDirect;
1310 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
1319 SDValue(Result, 0), getI32Imm(Hi16));
1328 *InstCnt = InstCntDirect;
1337 int64_t
Imm = cast<ConstantSDNode>(
N)->getZExtValue();
1349 class BitPermutationSelector {
1364 VariableKnownToBeZero
1368 : V(V), Idx(
I), K(K) {}
1369 ValueBit(
Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1372 return K == ConstZero || K == VariableKnownToBeZero;
1375 bool hasValue()
const {
1376 return K == Variable || K == VariableKnownToBeZero;
1380 assert(hasValue() &&
"Cannot get the value of a constant bit");
1384 unsigned getValueBitIndex()
const {
1385 assert(hasValue() &&
"Cannot get the value bit index of a constant bit");
1394 unsigned StartIdx, EndIdx;
1404 bool Repl32Coalesced;
1406 BitGroup(
SDValue V,
unsigned R,
unsigned S,
unsigned E)
1407 : V(V), RLAmt(
R), StartIdx(
S), EndIdx(
E), Repl32(
false), Repl32CR(
false),
1408 Repl32Coalesced(
false) {
1410 <<
" [" <<
S <<
", " <<
E <<
"]\n");
1416 struct ValueRotInfo {
1419 unsigned NumGroups = 0;
1421 bool Repl32 =
false;
1423 ValueRotInfo() =
default;
1427 bool operator < (
const ValueRotInfo &Other)
const {
1431 if (Repl32 <
Other.Repl32)
1433 else if (Repl32 >
Other.Repl32)
1435 else if (NumGroups >
Other.NumGroups)
1437 else if (NumGroups <
Other.NumGroups)
1439 else if (RLAmt == 0 &&
Other.RLAmt != 0)
1441 else if (RLAmt != 0 &&
Other.RLAmt == 0)
1443 else if (FirstGroupStartIdx <
Other.FirstGroupStartIdx)
1449 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1450 using ValueBitsMemoizer =
1452 ValueBitsMemoizer Memoizer;
1458 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(
SDValue V,
1460 auto &ValueEntry = Memoizer[V];
1462 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1463 ValueEntry.reset(
new ValueBitsMemoizedValue());
1464 bool &Interesting = ValueEntry->first;
1466 Bits.resize(NumBits);
1474 const auto &LHSBits = *getValueBits(V.
getOperand(0), NumBits).second;
1476 for (
unsigned i = 0;
i < NumBits; ++
i)
1477 Bits[
i] = LHSBits[
i < RotAmt ?
i + (NumBits - RotAmt) :
i - RotAmt];
1479 return std::make_pair(Interesting =
true, &
Bits);
1487 const auto &LHSBits = *getValueBits(V.
getOperand(0), NumBits).second;
1489 for (
unsigned i = ShiftAmt;
i < NumBits; ++
i)
1490 Bits[
i] = LHSBits[
i - ShiftAmt];
1492 for (
unsigned i = 0;
i < ShiftAmt; ++
i)
1493 Bits[
i] = ValueBit(ValueBit::ConstZero);
1495 return std::make_pair(Interesting =
true, &
Bits);
1503 const auto &LHSBits = *getValueBits(V.
getOperand(0), NumBits).second;
1505 for (
unsigned i = 0;
i < NumBits - ShiftAmt; ++
i)
1506 Bits[
i] = LHSBits[
i + ShiftAmt];
1508 for (
unsigned i = NumBits - ShiftAmt;
i < NumBits; ++
i)
1509 Bits[
i] = ValueBit(ValueBit::ConstZero);
1511 return std::make_pair(Interesting =
true, &
Bits);
1523 std::tie(Interesting, LHSBits) = getValueBits(V.
getOperand(0), NumBits);
1525 for (
unsigned i = 0;
i < NumBits; ++
i)
1526 if (((
Mask >>
i) & 1) == 1)
1534 Bits[
i] = ValueBit(ValueBit::ConstZero);
1537 return std::make_pair(Interesting, &
Bits);
1541 const auto &LHSBits = *getValueBits(V.
getOperand(0), NumBits).second;
1542 const auto &RHSBits = *getValueBits(V.
getOperand(1), NumBits).second;
1544 bool AllDisjoint =
true;
1546 unsigned LastIdx = 0;
1547 for (
unsigned i = 0;
i < NumBits; ++
i) {
1555 if (LHSBits[
i].hasValue() && LHSBits[
i].getValue() == LastVal &&
1556 LHSBits[
i].getValueBitIndex() == LastIdx + 1)
1558 else if (RHSBits[
i].hasValue() && RHSBits[
i].getValue() == LastVal &&
1559 RHSBits[
i].getValueBitIndex() == LastIdx + 1)
1562 Bits[
i] = ValueBit(ValueBit::ConstZero);
1569 AllDisjoint =
false;
1573 if (
Bits[
i].hasValue()) {
1574 LastVal =
Bits[
i].getValue();
1575 LastIdx =
Bits[
i].getValueBitIndex();
1578 if (LastVal) LastVal =
SDValue();
1586 return std::make_pair(Interesting =
true, &
Bits);
1595 const unsigned NumOperandBits = 32;
1596 std::tie(Interesting, LHSBits) = getValueBits(V.
getOperand(0),
1599 for (
unsigned i = 0;
i < NumOperandBits; ++
i)
1602 for (
unsigned i = NumOperandBits;
i < NumBits; ++
i)
1603 Bits[
i] = ValueBit(ValueBit::ConstZero);
1605 return std::make_pair(Interesting, &
Bits);
1613 const unsigned NumAllBits =
FromType.getSizeInBits();
1615 std::tie(Interesting, InBits) = getValueBits(V.
getOperand(0),
1621 bool UseUpper32bit =
false;
1622 for (
unsigned i = 0;
i < NumValidBits; ++
i)
1623 if ((*InBits)[
i].hasValue() && (*InBits)[
i].getValueBitIndex() >= 32) {
1624 UseUpper32bit =
true;
1630 for (
unsigned i = 0;
i < NumValidBits; ++
i)
1633 return std::make_pair(Interesting, &
Bits);
1639 std::tie(Interesting, LHSBits) = getValueBits(V.
getOperand(0),
1643 const unsigned NumValidBits =
FromType.getSizeInBits();
1644 for (
unsigned i = 0;
i < NumValidBits; ++
i)
1649 for (
unsigned i = NumValidBits;
i < NumBits; ++
i)
1650 Bits[
i] = (*LHSBits)[
i].hasValue()
1651 ? ValueBit((*LHSBits)[
i].getValue(),
1652 (*LHSBits)[
i].getValueBitIndex(),
1653 ValueBit::VariableKnownToBeZero)
1654 : ValueBit(ValueBit::ConstZero);
1656 return std::make_pair(Interesting, &
Bits);
1661 EVT VT =
LD->getMemoryVT();
1664 for (
unsigned i = 0;
i < NumValidBits; ++
i)
1665 Bits[
i] = ValueBit(V,
i);
1668 for (
unsigned i = NumValidBits;
i < NumBits; ++
i)
1669 Bits[
i] = ValueBit(V,
i, ValueBit::VariableKnownToBeZero);
1673 return std::make_pair(Interesting =
false, &
Bits);
1678 for (
unsigned i = 0;
i < NumBits; ++
i)
1679 Bits[
i] = ValueBit(V,
i);
1681 return std::make_pair(Interesting =
false, &
Bits);
1686 void computeRotationAmounts() {
1688 RLAmt.resize(
Bits.size());
1689 for (
unsigned i = 0;
i <
Bits.size(); ++
i)
1690 if (
Bits[
i].hasValue()) {
1691 unsigned VBI =
Bits[
i].getValueBitIndex();
1695 RLAmt[
i] =
Bits.size() - (VBI -
i);
1698 RLAmt[
i] = UINT32_MAX;
1707 void collectBitGroups(
bool LateMask) {
1710 unsigned LastRLAmt = RLAmt[0];
1712 unsigned LastGroupStartIdx = 0;
1713 bool IsGroupOfZeros = !
Bits[LastGroupStartIdx].hasValue();
1714 for (
unsigned i = 1;
i <
Bits.size(); ++
i) {
1715 unsigned ThisRLAmt = RLAmt[
i];
1717 if (LateMask && !ThisValue) {
1718 ThisValue = LastValue;
1719 ThisRLAmt = LastRLAmt;
1722 if (BitGroups.empty())
1723 LastGroupStartIdx = 0;
1735 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1738 if (!(IsGroupOfZeros && ThisValue && !
Bits[
i].
isZero()))
1742 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1744 LastRLAmt = ThisRLAmt;
1745 LastValue = ThisValue;
1746 LastGroupStartIdx =
i;
1747 IsGroupOfZeros = !
Bits[LastGroupStartIdx].hasValue();
1750 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1753 if (BitGroups.empty())
1757 if (BitGroups.size() > 1) {
1761 if (BitGroups[0].StartIdx == 0 &&
1762 BitGroups[BitGroups.size()-1].EndIdx ==
Bits.size()-1 &&
1763 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1764 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1765 LLVM_DEBUG(
dbgs() <<
"\tcombining final bit group with initial one\n");
1766 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1767 BitGroups.erase(BitGroups.begin());
1777 void collectValueRotInfo() {
1780 for (
auto &BG : BitGroups) {
1781 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1782 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1784 VRI.RLAmt = BG.RLAmt;
1785 VRI.Repl32 = BG.Repl32;
1787 VRI.FirstGroupStartIdx =
std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1792 ValueRotsVec.clear();
1793 for (
auto &
I : ValueRots) {
1794 ValueRotsVec.push_back(
I.second);
1807 void assignRepl32BitGroups() {
1818 auto IsAllLow32 = [
this](BitGroup & BG) {
1819 if (BG.StartIdx <= BG.EndIdx) {
1820 for (
unsigned i = BG.StartIdx;
i <= BG.EndIdx; ++
i) {
1821 if (!
Bits[
i].hasValue())
1823 if (
Bits[
i].getValueBitIndex() >= 32)
1827 for (
unsigned i = BG.StartIdx;
i <
Bits.size(); ++
i) {
1828 if (!
Bits[
i].hasValue())
1830 if (
Bits[
i].getValueBitIndex() >= 32)
1833 for (
unsigned i = 0;
i <= BG.EndIdx; ++
i) {
1834 if (!
Bits[
i].hasValue())
1836 if (
Bits[
i].getValueBitIndex() >= 32)
1844 for (
auto &BG : BitGroups) {
1848 if (BG.RLAmt == 0) {
1849 auto PotentiallyMerged = [
this](BitGroup & BG) {
1850 for (
auto &BG2 : BitGroups)
1851 if (&BG != &BG2 && BG.V == BG2.V &&
1852 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1856 if (!PotentiallyMerged(BG))
1859 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1860 if (IsAllLow32(BG)) {
1861 if (BG.RLAmt >= 32) {
1869 << BG.V.getNode() <<
" RLAmt = " << BG.RLAmt <<
" ["
1870 << BG.StartIdx <<
", " << BG.EndIdx <<
"]\n");
1876 for (
auto I = BitGroups.begin();
I != BitGroups.end();) {
1879 auto IP = (
I == BitGroups.begin()) ?
1880 std::prev(BitGroups.end()) : std::prev(
I);
1881 if (
I->Repl32 &&
IP->Repl32 &&
I->V ==
IP->V &&
I->RLAmt ==
IP->RLAmt &&
1882 I->StartIdx == (
IP->EndIdx + 1) % 64 &&
I !=
IP) {
1884 LLVM_DEBUG(
dbgs() <<
"\tcombining 32-bit replicated bit group for "
1885 <<
I->V.getNode() <<
" RLAmt = " <<
I->RLAmt <<
" ["
1886 <<
I->StartIdx <<
", " <<
I->EndIdx
1887 <<
"] with group with range [" <<
IP->StartIdx <<
", "
1888 <<
IP->EndIdx <<
"]\n");
1890 IP->EndIdx =
I->EndIdx;
1891 IP->Repl32CR =
IP->Repl32CR ||
I->Repl32CR;
1892 IP->Repl32Coalesced =
true;
1893 I = BitGroups.erase(
I);
1902 if (
I->StartIdx == 32 &&
I->EndIdx == 63) {
1903 assert(std::next(
I) == BitGroups.end() &&
1904 "bit group ends at index 63 but there is another?");
1905 auto IN = BitGroups.begin();
1907 if (
IP->Repl32 && IN->Repl32 &&
I->V ==
IP->V &&
I->V == IN->V &&
1908 (
I->RLAmt % 32) ==
IP->RLAmt && (
I->RLAmt % 32) == IN->RLAmt &&
1909 IP->EndIdx == 31 && IN->StartIdx == 0 &&
I !=
IP &&
1913 <<
" RLAmt = " <<
I->RLAmt <<
" [" <<
I->StartIdx
1914 <<
", " <<
I->EndIdx
1915 <<
"] with 32-bit replicated groups with ranges ["
1916 <<
IP->StartIdx <<
", " <<
IP->EndIdx <<
"] and ["
1917 << IN->StartIdx <<
", " << IN->EndIdx <<
"]\n");
1925 IP->Repl32CR =
IP->Repl32CR ||
I->RLAmt >= 32;
1926 IP->Repl32Coalesced =
true;
1927 I = BitGroups.erase(
I);
1932 IP->EndIdx = IN->EndIdx;
1933 IP->Repl32CR =
IP->Repl32CR || IN->Repl32CR ||
I->RLAmt >= 32;
1934 IP->Repl32Coalesced =
true;
1935 I = BitGroups.erase(
I);
1936 BitGroups.erase(BitGroups.begin());
1951 return CurDAG->getTargetConstant(
Imm, dl,
MVT::i32);
1956 for (
unsigned i = 0;
i <
Bits.size(); ++
i) {
1957 if (
Bits[
i].hasValue())
1959 Mask |= (UINT64_C(1) <<
i);
1974 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl,
MVT::i32);
1975 SDValue ImDef =
SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1977 SDValue ExtVal =
SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1988 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl,
MVT::i32);
1989 SDValue SubVal =
SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1997 void SelectAndParts32(
const SDLoc &dl,
SDValue &Res,
unsigned *InstCnt) {
2001 for (ValueRotInfo &VRI : ValueRotsVec) {
2003 for (
unsigned i = 0;
i <
Bits.size(); ++
i) {
2004 if (!
Bits[
i].hasValue() ||
Bits[
i].getValue() != VRI.V)
2006 if (RLAmt[
i] != VRI.RLAmt)
2012 unsigned ANDIMask = (
Mask & UINT16_MAX), ANDISMask =
Mask >> 16;
2013 assert((ANDIMask != 0 || ANDISMask != 0) &&
2014 "No set bits in mask for value bit groups");
2015 bool NeedsRotate = VRI.RLAmt != 0;
2031 unsigned NumAndInsts = (unsigned) NeedsRotate +
2032 (
unsigned) (ANDIMask != 0) +
2033 (
unsigned) (ANDISMask != 0) +
2034 (
unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2035 (
unsigned) (bool) Res;
2037 LLVM_DEBUG(
dbgs() <<
"\t\trotation groups for " << VRI.V.getNode()
2038 <<
" RL: " << VRI.RLAmt <<
":"
2039 <<
"\n\t\t\tisel using masking: " << NumAndInsts
2040 <<
" using rotates: " << VRI.NumGroups <<
"\n");
2042 if (NumAndInsts >= VRI.NumGroups)
2047 if (InstCnt) *InstCnt += NumAndInsts;
2052 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2053 getI32Imm(0, dl), getI32Imm(31, dl) };
2057 VRot = TruncateToInt32(VRI.V, dl);
2063 VRot, getI32Imm(ANDIMask, dl)),
2068 getI32Imm(ANDISMask, dl)),
2073 TotalVal = ANDISVal;
2078 ANDIVal, ANDISVal), 0);
2088 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2089 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2095 SDNode *Select32(
SDNode *
N,
bool LateMask,
unsigned *InstCnt) {
2099 if (InstCnt) *InstCnt = 0;
2102 SelectAndParts32(dl, Res, InstCnt);
2107 if ((!NeedMask || LateMask) && !Res) {
2108 ValueRotInfo &VRI = ValueRotsVec[0];
2110 if (InstCnt) *InstCnt += 1;
2112 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2113 getI32Imm(0, dl), getI32Imm(31, dl) };
2117 Res = TruncateToInt32(VRI.V, dl);
2121 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2122 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2126 if (InstCnt) *InstCnt += BitGroups.size();
2129 for (
auto &BG : BitGroups) {
2132 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2133 getI32Imm(
Bits.size() - BG.EndIdx - 1, dl),
2134 getI32Imm(
Bits.size() - BG.StartIdx - 1, dl) };
2135 Res =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl,
MVT::i32, Ops), 0);
2138 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2139 getI32Imm(
Bits.size() - BG.EndIdx - 1, dl),
2140 getI32Imm(
Bits.size() - BG.StartIdx - 1, dl) };
2141 Res =
SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl,
MVT::i32, Ops), 0);
2146 unsigned Mask = (unsigned) getZerosMask();
2148 unsigned ANDIMask = (
Mask & UINT16_MAX), ANDISMask =
Mask >> 16;
2149 assert((ANDIMask != 0 || ANDISMask != 0) &&
2150 "No set bits in zeros mask?");
2152 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2153 (unsigned) (ANDISMask != 0) +
2154 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2159 Res, getI32Imm(ANDIMask, dl)),
2164 getI32Imm(ANDISMask, dl)),
2173 ANDIVal, ANDISVal), 0);
2179 unsigned SelectRotMask64Count(
unsigned RLAmt,
bool Repl32,
2180 unsigned MaskStart,
unsigned MaskEnd,
2184 unsigned InstMaskStart = 64 - MaskEnd - 1,
2185 InstMaskEnd = 64 - MaskStart - 1;
2190 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2191 InstMaskEnd == 63 - RLAmt)
2200 bool Repl32,
unsigned MaskStart,
unsigned MaskEnd,
2201 unsigned *InstCnt =
nullptr) {
2204 unsigned InstMaskStart = 64 - MaskEnd - 1,
2205 InstMaskEnd = 64 - MaskStart - 1;
2207 if (InstCnt) *InstCnt += 1;
2213 assert(InstMaskStart >= 32 &&
"Mask cannot start out of range");
2214 assert(InstMaskEnd >= 32 &&
"Mask cannot end out of range");
2216 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2217 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2222 if (InstMaskEnd == 63) {
2224 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2225 getI32Imm(InstMaskStart, dl) };
2226 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl,
MVT::i64, Ops), 0);
2229 if (InstMaskStart == 0) {
2231 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2232 getI32Imm(InstMaskEnd, dl) };
2233 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl,
MVT::i64, Ops), 0);
2236 if (InstMaskEnd == 63 - RLAmt) {
2238 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2239 getI32Imm(InstMaskStart, dl) };
2240 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl,
MVT::i64, Ops), 0);
2249 if (InstCnt) *InstCnt += 1;
2252 unsigned RLAmt2 = MaskStart;
2255 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2257 V = SelectRotMask64(V, dl, RLAmt1,
false, 0, 63);
2258 return SelectRotMask64(V, dl, RLAmt2,
false, MaskStart, MaskEnd);
2264 unsigned RLAmt,
bool Repl32,
unsigned MaskStart,
2265 unsigned MaskEnd,
unsigned *InstCnt =
nullptr) {
2268 unsigned InstMaskStart = 64 - MaskEnd - 1,
2269 InstMaskEnd = 64 - MaskStart - 1;
2271 if (InstCnt) *InstCnt += 1;
2277 assert(InstMaskStart >= 32 &&
"Mask cannot start out of range");
2278 assert(InstMaskEnd >= 32 &&
"Mask cannot end out of range");
2280 { ExtendToInt64(
Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2281 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2286 if (InstMaskEnd == 63 - RLAmt) {
2288 { ExtendToInt64(
Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2289 getI32Imm(InstMaskStart, dl) };
2290 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl,
MVT::i64, Ops), 0);
2299 if (InstCnt) *InstCnt += 1;
2302 unsigned RLAmt2 = MaskStart;
2305 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2307 V = SelectRotMask64(V, dl, RLAmt1,
false, 0, 63);
2308 return SelectRotMaskIns64(
Base, V, dl, RLAmt2,
false, MaskStart, MaskEnd);
2311 void SelectAndParts64(
const SDLoc &dl,
SDValue &Res,
unsigned *InstCnt) {
2324 for (ValueRotInfo &VRI : ValueRotsVec) {
2332 auto MatchingBG = [VRI](
const BitGroup &BG) {
2336 unsigned EffRLAmt = BG.RLAmt;
2337 if (!VRI.Repl32 && BG.Repl32) {
2338 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2339 !BG.Repl32Coalesced) {
2345 }
else if (VRI.Repl32 != BG.Repl32) {
2349 return VRI.RLAmt == EffRLAmt;
2352 for (
auto &BG : BitGroups) {
2353 if (!MatchingBG(BG))
2356 if (BG.StartIdx <= BG.EndIdx) {
2357 for (
unsigned i = BG.StartIdx;
i <= BG.EndIdx; ++
i)
2358 Mask |= (UINT64_C(1) <<
i);
2360 for (
unsigned i = BG.StartIdx;
i <
Bits.size(); ++
i)
2361 Mask |= (UINT64_C(1) <<
i);
2362 for (
unsigned i = 0;
i <= BG.EndIdx; ++
i)
2363 Mask |= (UINT64_C(1) <<
i);
2372 unsigned ANDIMask = (
Mask & UINT16_MAX),
2373 ANDISMask = (
Mask >> 16) & UINT16_MAX;
2375 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !
isUInt<32>(
Mask));
2377 unsigned NumAndInsts = (unsigned) NeedsRotate +
2378 (
unsigned) (bool) Res;
2379 unsigned NumOfSelectInsts = 0;
2381 assert(NumOfSelectInsts > 0 &&
"Failed to select an i64 constant.");
2383 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2384 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2386 NumAndInsts += NumOfSelectInsts + 1;
2388 unsigned NumRLInsts = 0;
2389 bool FirstBG =
true;
2390 bool MoreBG =
false;
2391 for (
auto &BG : BitGroups) {
2392 if (!MatchingBG(BG)) {
2397 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2402 LLVM_DEBUG(
dbgs() <<
"\t\trotation groups for " << VRI.V.getNode()
2403 <<
" RL: " << VRI.RLAmt << (VRI.Repl32 ?
" (32):" :
":")
2404 <<
"\n\t\t\tisel using masking: " << NumAndInsts
2405 <<
" using rotates: " << NumRLInsts <<
"\n");
2411 if (NumAndInsts > NumRLInsts)
2416 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2421 if (InstCnt) *InstCnt += NumAndInsts;
2429 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2430 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2435 if (Use32BitInsts) {
2436 assert((ANDIMask != 0 || ANDISMask != 0) &&
2437 "No set bits in mask when using 32-bit ands for 64-bit value");
2441 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl,
MVT::i64,
2442 ExtendToInt64(VRot, dl),
2443 getI32Imm(ANDIMask, dl)),
2448 ExtendToInt64(VRot, dl),
2449 getI32Imm(ANDISMask, dl)),
2453 TotalVal = ANDISVal;
2458 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2463 ExtendToInt64(VRot, dl), TotalVal),
2471 ExtendToInt64(Res, dl), TotalVal),
2476 eraseMatchingBitGroups(MatchingBG);
2481 SDNode *Select64(
SDNode *
N,
bool LateMask,
unsigned *InstCnt) {
2485 if (InstCnt) *InstCnt = 0;
2488 SelectAndParts64(dl, Res, InstCnt);
2493 if ((!NeedMask || LateMask) && !Res) {
2497 unsigned MaxGroupsIdx = 0;
2498 if (!ValueRotsVec[0].Repl32) {
2499 for (
unsigned i = 0, ie = ValueRotsVec.size();
i < ie; ++
i)
2500 if (ValueRotsVec[
i].Repl32) {
2501 if (ValueRotsVec[
i].NumGroups > ValueRotsVec[0].NumGroups)
2507 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2508 bool NeedsRotate =
false;
2511 }
else if (VRI.Repl32) {
2512 for (
auto &BG : BitGroups) {
2513 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2514 BG.Repl32 != VRI.Repl32)
2519 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2528 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2529 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2536 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2537 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2538 BG.Repl32 == VRI.Repl32;
2545 for (
auto I = BitGroups.begin(),
IE = BitGroups.end();
I !=
IE; ++
I) {
2546 if (SelectRotMask64Count(
I->RLAmt,
I->Repl32,
I->StartIdx,
I->EndIdx,
2548 SelectRotMask64Count(
I->RLAmt,
I->Repl32,
I->StartIdx,
I->EndIdx,
2550 if (
I != BitGroups.begin()) {
2553 BitGroups.insert(BitGroups.begin(), BG);
2561 for (
auto &BG : BitGroups) {
2563 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2564 BG.EndIdx, InstCnt);
2566 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2567 BG.StartIdx, BG.EndIdx, InstCnt);
2578 unsigned ANDIMask = (
Mask & UINT16_MAX),
2579 ANDISMask = (
Mask >> 16) & UINT16_MAX;
2581 if (Use32BitInsts) {
2582 assert((ANDIMask != 0 || ANDISMask != 0) &&
2583 "No set bits in mask when using 32-bit ands for 64-bit value");
2585 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2586 (unsigned) (ANDISMask != 0) +
2587 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2591 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl,
MVT::i64,
2592 ExtendToInt64(Res, dl),
2593 getI32Imm(ANDIMask, dl)),
2598 ExtendToInt64(Res, dl),
2599 getI32Imm(ANDISMask, dl)),
2608 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2610 unsigned NumOfSelectInsts = 0;
2614 ExtendToInt64(Res, dl), MaskVal),
2617 *InstCnt += NumOfSelectInsts + 1;
2626 collectBitGroups(LateMask);
2627 if (BitGroups.empty())
2631 if (
Bits.size() == 64)
2632 assignRepl32BitGroups();
2635 collectValueRotInfo();
2637 if (
Bits.size() == 32) {
2638 return Select32(
N, LateMask, InstCnt);
2640 assert(
Bits.size() == 64 &&
"Not 64 bits here?");
2641 return Select64(
N, LateMask, InstCnt);
2647 void eraseMatchingBitGroups(
function_ref<
bool(
const BitGroup &)>
F) {
2653 bool NeedMask =
false;
2673 getValueBits(
SDValue(
N, 0),
N->getValueType(0).getSizeInBits());
2678 LLVM_DEBUG(
dbgs() <<
"Considering bit-permutation-based instruction"
2679 " selection for: ");
2683 computeRotationAmounts();
2696 unsigned InstCnt = 0, InstCntLateMask = 0;
2699 LLVM_DEBUG(
dbgs() <<
"\t\tisel would use " << InstCnt <<
" instructions\n");
2704 <<
" instructions\n");
2706 if (InstCnt <= InstCntLateMask) {
2716 class IntegerCompareEliminator {
2721 enum ExtOrTruncConversion {
Ext, Trunc };
2729 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2739 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2746 SDValue addExtOrTrunc(
SDValue NatWidthRes, ExtOrTruncConversion Conv);
2750 int64_t RHSValue,
SDLoc dl);
2752 int64_t RHSValue,
SDLoc dl);
2754 int64_t RHSValue,
SDLoc dl);
2756 int64_t RHSValue,
SDLoc dl);
2761 PPCDAGToDAGISel *Sel) : CurDAG(DAG),
S(Sel) {
2764 "Only expecting to use this on 64 bit targets.");
2769 switch (
N->getOpcode()) {
2780 return tryEXTEND(
N);
2784 return tryLogicOpOfCompares(
N);
2790 static bool isLogicOp(
unsigned Opc) {
2798 "Expecting a zero/sign extend node!");
2802 if (isLogicOp(
N->getOperand(0).getOpcode()) &&
2803 N->getOperand(0).getValueType() ==
MVT::i1 &&
2805 WideRes = computeLogicOpInGPR(
N->getOperand(0));
2806 else if (
N->getOperand(0).getOpcode() !=
ISD::SETCC)
2810 getSETCCInGPR(
N->getOperand(0),
2812 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2819 bool Output32Bit =
N->getValueType(0) ==
MVT::i32;
2825 if (Input32Bit != Output32Bit)
2827 ExtOrTruncConversion::Trunc);
2835 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(
SDNode *
N) {
2838 assert(isLogicOp(
N->getOpcode()) &&
2839 "Expected a logic operation on setcc results.");
2841 if (!LoweredLogical)
2846 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2855 if (IsBitwiseNegate &&
2858 else if (IsBitwiseNegate)
2860 OpToConvToRecForm = LoweredLogical.
getOperand(0);
2864 OpToConvToRecForm = LoweredLogical;
2874 if (NewOpc != -1 && IsBitwiseNegate) {
2877 "Expected a PPC::XORI8 only for bitwise negation.");
2879 std::vector<SDValue> Ops;
2888 assert((NewOpc != -1 || !IsBitwiseNegate) &&
2889 "No record form available for AND8/OR8/XOR8?");
2917 SDValue IntegerCompareEliminator::computeLogicOpInGPR(
SDValue LogicOp) {
2919 "Can only handle logic operations here.");
2921 "Can only handle logic operations on i1 values here.");
2933 unsigned OperandOpcode = Operand.
getOpcode();
2935 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2940 PPC::RLDICL, dl, InVT, InputOp,
2941 S->getI64Imm(0, dl),
2942 S->getI64Imm(63, dl)), 0);
2943 }
else if (isLogicOp(OperandOpcode))
2944 return computeLogicOpInGPR(Operand);
2953 if (!
LHS || (!
RHS && !IsBitwiseNegation))
2956 NumLogicOpsOnComparison++;
2961 if (!IsBitwiseNegation &&
RHS.getValueType() ==
MVT::i32)
2967 case ISD::AND: NewOpc = PPC::AND8;
break;
2968 case ISD::OR: NewOpc = PPC::OR8;
break;
2969 case ISD::XOR: NewOpc = PPC::XOR8;
break;
2972 if (IsBitwiseNegation) {
2973 RHS =
S->getI64Imm(1, dl);
2974 NewOpc = PPC::XORI8;
2985 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(
SDValue Input) {
2987 "Can only sign-extend 32-bit values here.");
2988 unsigned Opc =
Input.getOpcode();
2997 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3009 SignExtensionsAdded++;
3018 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(
SDValue Input) {
3020 "Can only zero-extend 32-bit values here.");
3021 unsigned Opc =
Input.getOpcode();
3030 if (IsTruncateOfZExt)
3037 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3044 ZeroExtensionsAdded++;
3046 S->getI64Imm(0, dl),
3047 S->getI64Imm(32, dl)), 0);
3054 SDValue IntegerCompareEliminator::addExtOrTrunc(
SDValue NatWidthRes,
3055 ExtOrTruncConversion Conv) {
3056 SDLoc dl(NatWidthRes);
3065 ImDef, NatWidthRes, SubRegIdx), 0);
3068 assert(Conv == ExtOrTruncConversion::Trunc &&
3069 "Unknown convertion between 32 and 64 bit values.");
3075 NatWidthRes, SubRegIdx), 0);
3081 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(
SDValue LHS,
SDLoc dl,
3082 ZeroCompare CmpTy) {
3083 EVT InVT =
LHS.getValueType();
3089 case ZeroCompare::GEZExt:
3090 case ZeroCompare::GESExt:
3094 case ZeroCompare::LEZExt:
3095 case ZeroCompare::LESExt: {
3098 LHS = signExtendInputIfNeeded(
LHS);
3103 Neg,
S->getI64Imm(1, dl),
3104 S->getI64Imm(63, dl)), 0);
3108 S->getI64Imm(~0ULL, dl)), 0);
3118 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3120 ToExtend,
S->getI64Imm(1, dl),
3121 S->getI64Imm(63, dl)), 0);
3123 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3125 S->getI64Imm(63, dl)), 0);
3127 assert(Is32Bit &&
"Should have handled the 32-bit sequences above.");
3130 case ZeroCompare::GEZExt: {
3131 SDValue ShiftOps[] = { ToExtend,
S->getI32Imm(1, dl),
S->getI32Imm(31, dl),
3132 S->getI32Imm(31, dl) };
3136 case ZeroCompare::GESExt:
3138 S->getI32Imm(31, dl)), 0);
3139 case ZeroCompare::LEZExt:
3141 S->getI32Imm(1, dl)), 0);
3142 case ZeroCompare::LESExt:
3144 S->getI32Imm(-1, dl)), 0);
3157 int64_t RHSValue,
SDLoc dl) {
3161 bool IsRHSZero = RHSValue == 0;
3162 bool IsRHSOne = RHSValue == 1;
3163 bool IsRHSNegOne = RHSValue == -1LL;
3173 SDValue ShiftOps[] = { Clz,
S->getI32Imm(27, dl),
S->getI32Imm(5, dl),
3174 S->getI32Imm(31, dl) };
3185 SDValue ShiftOps[] = { Clz,
S->getI32Imm(27, dl),
S->getI32Imm(5, dl),
3186 S->getI32Imm(31, dl) };
3190 S->getI32Imm(1, dl)), 0);
3196 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GEZExt);
3202 IsRHSZero = RHSConst && RHSConst->
isZero();
3213 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LEZExt);
3217 LHS = signExtendInputIfNeeded(
LHS);
3218 RHS = signExtendInputIfNeeded(
RHS);
3223 S->getI64Imm(1, dl),
S->getI64Imm(63, dl)),
3235 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GEZExt);
3241 LHS = signExtendInputIfNeeded(
LHS);
3242 RHS = signExtendInputIfNeeded(
RHS);
3246 Neg,
S->getI32Imm(1, dl),
S->getI32Imm(63, dl)), 0);
3252 IsRHSZero = RHSConst && RHSConst->
isZero();
3264 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LEZExt);
3268 SDValue ShiftOps[] = {
LHS,
S->getI32Imm(1, dl),
S->getI32Imm(31, dl),
3269 S->getI32Imm(31, dl) };
3277 LHS = signExtendInputIfNeeded(
LHS);
3278 RHS = signExtendInputIfNeeded(
RHS);
3282 SUBFNode,
S->getI64Imm(1, dl),
3283 S->getI64Imm(63, dl)), 0);
3294 LHS = zeroExtendInputIfNeeded(
LHS);
3295 RHS = zeroExtendInputIfNeeded(
RHS);
3300 Subtract,
S->getI64Imm(1, dl),
3301 S->getI64Imm(63, dl)), 0);
3303 S->getI32Imm(1, dl)), 0);
3314 LHS = zeroExtendInputIfNeeded(
LHS);
3315 RHS = zeroExtendInputIfNeeded(
RHS);
3319 Subtract,
S->getI64Imm(1, dl),
3320 S->getI64Imm(63, dl)), 0);
3330 int64_t RHSValue,
SDLoc dl) {
3334 bool IsRHSZero = RHSValue == 0;
3335 bool IsRHSOne = RHSValue == 1;
3336 bool IsRHSNegOne = RHSValue == -1LL;
3349 SDValue SHLOps[] = { Cntlzw,
S->getI32Imm(27, dl),
3350 S->getI32Imm(5, dl),
S->getI32Imm(31, dl) };
3368 { Clz,
S->getI32Imm(27, dl),
S->getI32Imm(5, dl),
S->getI32Imm(31, dl) };
3373 S->getI32Imm(1, dl)), 0);
3380 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GESExt);
3386 IsRHSZero = RHSConst && RHSConst->
isZero();
3395 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LESExt);
3398 LHS = signExtendInputIfNeeded(
LHS);
3399 RHS = signExtendInputIfNeeded(
RHS);
3405 SUBFNode,
S->getI64Imm(1, dl),
3406 S->getI64Imm(63, dl)), 0);
3408 S->getI32Imm(-1, dl)), 0);
3415 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GESExt);
3420 LHS = signExtendInputIfNeeded(
LHS);
3421 RHS = signExtendInputIfNeeded(
RHS);
3425 S->getI64Imm(63, dl)), 0);
3431 IsRHSZero = RHSConst && RHSConst->
isZero();
3442 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LESExt);
3446 S->getI32Imm(31, dl)), 0);
3451 LHS = signExtendInputIfNeeded(
LHS);
3452 RHS = signExtendInputIfNeeded(
RHS);
3456 SUBFNode,
S->getI64Imm(63, dl)), 0);
3467 LHS = zeroExtendInputIfNeeded(
LHS);
3468 RHS = zeroExtendInputIfNeeded(
RHS);
3473 S->getI32Imm(1, dl),
S->getI32Imm(63,dl)),
3476 S->getI32Imm(-1, dl)), 0);
3487 LHS = zeroExtendInputIfNeeded(
LHS);
3488 RHS = zeroExtendInputIfNeeded(
RHS);
3492 Subtract,
S->getI64Imm(63, dl)), 0);
3502 int64_t RHSValue,
SDLoc dl) {
3506 bool IsRHSZero = RHSValue == 0;
3507 bool IsRHSOne = RHSValue == 1;
3508 bool IsRHSNegOne = RHSValue == -1LL;
3519 S->getI64Imm(58, dl),
3520 S->getI64Imm(63, dl)), 0);
3531 Xor,
S->getI32Imm(~0U, dl)), 0);
3541 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GEZExt);
3544 IsRHSZero = RHSConst && RHSConst->
isZero();
3553 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LEZExt);
3556 S->getI64Imm(1, dl),
3557 S->getI64Imm(63, dl)), 0);
3560 S->getI64Imm(63, dl)), 0);
3565 ShiftR, ShiftL, SubtractCarry), 0);
3573 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GEZExt);
3577 S->getI64Imm(~0ULL, dl)), 0);
3581 S->getI64Imm(1, dl),
3582 S->getI64Imm(63, dl)), 0);
3586 IsRHSZero = RHSConst && RHSConst->
isZero();
3596 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LEZExt);
3599 S->getI64Imm(1, dl),
3600 S->getI64Imm(63, dl)), 0);
3603 LHS,
S->getI64Imm(63, dl)), 0);
3606 RHS,
S->getI64Imm(1, dl),
3607 S->getI64Imm(63, dl)), 0);
3613 SRDINode, SRADINode, SUBFC8Carry), 0);
3615 ADDE8Node,
S->getI64Imm(1, dl)), 0);
3630 LHS,
LHS, SUBFC8Carry), 0);
3632 SUBFE8Node,
S->getI64Imm(1, dl)), 0);
3647 LHS,
LHS, SubtractCarry), 0);
3659 int64_t RHSValue,
SDLoc dl) {
3663 bool IsRHSZero = RHSValue == 0;
3664 bool IsRHSOne = RHSValue == 1;
3665 bool IsRHSNegOne = RHSValue == -1LL;
3677 AddInput,
S->getI32Imm(~0U, dl)), 0);
3690 Xor,
S->getI32Imm(0, dl)), 0);
3692 SC,
SC.getValue(1)), 0);
3700 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GESExt);
3703 IsRHSZero = RHSConst && RHSConst->
isZero();
3712 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LESExt);
3715 S->getI64Imm(63, dl)), 0);
3718 S->getI64Imm(1, dl),
3719 S->getI64Imm(63, dl)), 0);
3725 ShiftR, ShiftL, SubtractCarry), 0);
3734 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::GESExt);
3738 S->getI64Imm(-1, dl)), 0);
3742 S->getI64Imm(63, dl)), 0);
3746 IsRHSZero = RHSConst && RHSConst->
isZero();
3756 return getCompoundZeroComparisonInGPR(
LHS, dl, ZeroCompare::LESExt);
3759 S->getI64Imm(63, dl)), 0);
3763 LHS,
S->getI64Imm(63, dl)), 0);
3766 RHS,
S->getI64Imm(1, dl),
3767 S->getI64Imm(63, dl)), 0);
3773 SRDINode, SRADINode, SUBFC8Carry), 0);
3776 ADDE8Node,
S->getI64Imm(1, dl)), 0);
3793 LHS, SubtractCarry), 0);
3795 ExtSub, ExtSub), 0);
3820 "An ISD::SETCC node required here.");
3828 for (
auto CompareUse :
Compare.getNode()->uses())
3832 !isLogicOp(CompareUse->getOpcode())) {
3833 OmittedForNonExtendUses++;
3843 SetccInGPROpts ConvOpts) {
3846 "An ISD::SETCC node required here.");
3859 cast<CondCodeSDNode>(
Compare.getOperand(CCOpNum))->get();
3860 EVT InputVT =
LHS.getValueType();
3864 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3865 ConvOpts == SetccInGPROpts::SExtInvert)
3868 bool Inputs32Bit = InputVT ==
MVT::i32;
3873 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3874 ConvOpts == SetccInGPROpts::SExtInvert;
3876 if (IsSext && Inputs32Bit)
3877 return get32BitSExtCompare(
LHS,
RHS, CC, RHSValue, dl);
3878 else if (Inputs32Bit)
3879 return get32BitZExtCompare(
LHS,
RHS, CC, RHSValue, dl);
3881 return get64BitSExtCompare(
LHS,
RHS, CC, RHSValue, dl);
3882 return get64BitZExtCompare(
LHS,
RHS, CC, RHSValue, dl);
3887 bool PPCDAGToDAGISel::tryIntCompareInGPR(
SDNode *
N) {
3904 switch (
N->getOpcode()) {
3911 IntegerCompareEliminator ICmpElim(CurDAG,
this);
3912 if (
SDNode *New = ICmpElim.Select(
N)) {
3913 ReplaceNode(
N, New);
3921 bool PPCDAGToDAGISel::tryBitPermutation(
SDNode *
N) {
3929 switch (
N->getOpcode()) {
3936 BitPermutationSelector BPS(CurDAG);
3937 if (
SDNode *New = BPS.Select(
N)) {
3938 ReplaceNode(
N, New);
3962 getI32Imm(
Imm & 0xFFFF, dl)),
3967 getI32Imm(
Imm & 0xFFFF, dl)),
3980 getI32Imm(
Imm >> 16, dl)), 0);
3982 getI32Imm(
Imm & 0xFFFF, dl)), 0);
3988 getI32Imm(
Imm & 0xFFFF, dl)), 0);
3994 getI32Imm((
int)SImm & 0xFFFF,
4006 getI32Imm(
Imm & 0xFFFF, dl)),
4011 getI32Imm(
Imm & 0xFFFF, dl)),
4025 getI64Imm(
Imm >> 16, dl)), 0);
4027 getI64Imm(
Imm & 0xFFFF, dl)),
4035 getI64Imm(
Imm & 0xFFFF, dl)), 0);
4041 getI64Imm(SImm & 0xFFFF, dl)),
4046 if (Subtarget->
hasSPE()) {
4051 Opc = PPC::EFSCMPEQ;
4059 Opc = PPC::EFSCMPLT;
4067 Opc = PPC::EFSCMPGT;
4073 if (Subtarget->
hasSPE()) {
4078 Opc = PPC::EFDCMPEQ;
4086 Opc = PPC::EFDCMPLT;
4094 Opc = PPC::EFDCMPGT;
4098 Opc = Subtarget->
hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4102 Opc = PPC::XSCMPUQP;
4170 case ISD::SETO: Invert =
true;
return 3;
4187 bool HasVSX,
bool &Swap,
bool &Negate) {
4215 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4217 return PPC::XVCMPEQDP;
4222 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4224 return PPC::XVCMPGTDP;
4229 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4231 return PPC::XVCMPGEDP;
4259 return PPC::VCMPEQUB;
4261 return PPC::VCMPEQUH;
4263 return PPC::VCMPEQUW;
4265 return PPC::VCMPEQUD;
4267 return PPC::VCMPEQUQ;
4271 return PPC::VCMPGTSB;
4273 return PPC::VCMPGTSH;
4275 return PPC::VCMPGTSW;
4277 return PPC::VCMPGTSD;
4279 return PPC::VCMPGTSQ;
4283 return PPC::VCMPGTUB;
4285 return PPC::VCMPGTUH;
4287 return PPC::VCMPGTUW;
4289 return PPC::VCMPGTUD;
4291 return PPC::VCMPGTUQ;
4300 bool PPCDAGToDAGISel::trySETCC(
SDNode *
N) {
4303 bool IsStrict =
N->isStrictFPOpcode();
4305 cast<CondCodeSDNode>(
N->getOperand(IsStrict ? 3 : 2))->get();
4308 bool isPPC64 = (PtrVT ==
MVT::i64);
4324 SDValue Ops[] = {
Op, getI32Imm(27, dl), getI32Imm(5, dl),
4325 getI32Imm(31, dl) };
4333 Op, getI32Imm(~0U, dl)), 0);
4338 SDValue Ops[] = {
Op, getI32Imm(1, dl), getI32Imm(31, dl),
4339 getI32Imm(31, dl) };
4347 SDValue Ops[] = {
T, getI32Imm(1, dl), getI32Imm(31, dl),
4348 getI32Imm(31, dl) };
4353 }
else if (
Imm == ~0U) {
4360 Op, getI32Imm(1, dl)), 0);
4365 0),
Op.getValue(1));
4371 Op, getI32Imm(~0U, dl));
4378 getI32Imm(1, dl)), 0);
4381 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4382 getI32Imm(31, dl) };
4387 SDValue Ops[] = {
Op, getI32Imm(1, dl), getI32Imm(31, dl),
4388 getI32Imm(31, dl) };
4399 if (!IsStrict &&
LHS.getValueType().isVector()) {
4403 EVT VecVT =
LHS.getValueType();
4405 unsigned int VCmpInst =
4434 if (Subtarget->
hasSPE() &&
LHS.getValueType().isFloatingPoint()) {
4448 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4449 getI32Imm(31, dl), getI32Imm(31, dl) };
4464 bool PPCDAGToDAGISel::isOffsetMultipleOf(
SDNode *
N,
unsigned Val)
const {
4470 AddrOp =
N->getOperand(1);
4485 if ((SlotAlign % Val) != 0)
4500 void PPCDAGToDAGISel::transferMemOperands(
SDNode *
N,
SDNode *Result) {
4507 bool &NeedSwapOps,
bool &IsUnCmp) {
4513 SDValue TrueRes =
N->getOperand(2);
4514 SDValue FalseRes =
N->getOperand(3);
4516 if (!TrueConst || (
N->getSimpleValueType(0) !=
MVT::i64 &&
4526 if ((TrueResVal < -1 || TrueResVal > 1) ||
4555 cast<CondCodeSDNode>(SetOrSelCC.
getOperand(InnerIsSel ? 4 : 2))->get();
4560 dyn_cast<ConstantSDNode>(SetOrSelCC.
getOperand(2));
4562 dyn_cast<ConstantSDNode>(SetOrSelCC.
getOperand(3));
4563 if (!SelCCTrueConst || !SelCCFalseConst)
4568 if (SelCCTVal == -1 && SelCCFVal == 1) {
4570 }
else if (SelCCTVal != 1 || SelCCFVal != -1)
4580 bool InnerSwapped =
false;
4581 if (
LHS == InnerRHS &&
RHS == InnerLHS)
4582 InnerSwapped =
true;
4583 else if (
LHS != InnerLHS ||
RHS != InnerRHS)
4594 NeedSwapOps = (InnerCC ==
ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4611 NeedSwapOps = (TrueResVal == 1);
4630 NeedSwapOps = (TrueResVal == -1);
4639 LLVM_DEBUG(
dbgs() <<
"Found a node that can be lowered to a SETB: ");
4649 if (
N.getNumOperands() < 1 || !isa<ConstantSDNode>(
N.getOperand(0)) ||
4652 switch (
N.getConstantOperandVal(0)) {
4653 case Intrinsic::ppc_vsx_xvtdivdp:
4654 case Intrinsic::ppc_vsx_xvtdivsp:
4655 case Intrinsic::ppc_vsx_xvtsqrtdp:
4656 case Intrinsic::ppc_vsx_xvtsqrtsp:
4662 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(
SDNode *
N) {
4674 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(1))->get();
4679 if (!isa<ConstantSDNode>(CmpRHS) ||
4680 cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4721 bool PPCDAGToDAGISel::tryAsSingleRLWINM(
SDNode *
N) {
4729 unsigned SH, MB, ME;
4732 if (isRotateAndMask(Val.
getNode(),
Imm,
false, SH, MB, ME)) {
4734 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4743 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),