22 #include "llvm/Config/llvm-config.h"
27 #include "llvm/IR/IntrinsicsX86.h"
37 #define DEBUG_TYPE "x86-isel"
39 STATISTIC(NumLoadMoved,
"Number of loads moved below TokenFactor");
42 cl::desc(
"Enable setting constant bits to reduce size of mask immediates"),
46 "x86-promote-anyext-load",
cl::init(
true),
58 struct X86ISelAddressMode {
66 int Base_FrameIndex = 0;
75 const char *ES =
nullptr;
80 bool NegateIndex =
false;
82 X86ISelAddressMode() =
default;
84 bool hasSymbolicDisplacement()
const {
85 return GV !=
nullptr ||
CP !=
nullptr || ES !=
nullptr ||
86 MCSym !=
nullptr ||
JT != -1 || BlockAddr !=
nullptr;
89 bool hasBaseOrIndexReg()
const {
96 if (
BaseType != RegBase)
return false;
98 dyn_cast_or_null<RegisterSDNode>(Base_Reg.
getNode()))
99 return RegNode->getReg() == X86::RIP;
108 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
110 dbgs() <<
"X86ISelAddressMode " <<
this <<
'\n';
111 dbgs() <<
"Base_Reg ";
117 dbgs() <<
" Base.FrameIndex " << Base_FrameIndex <<
'\n';
118 dbgs() <<
" Scale " << Scale <<
'\n'
126 dbgs() <<
" Disp " << Disp <<
'\n'
168 bool IndirectTlsSegRefs;
173 OptForMinSize(
false), IndirectTlsSegRefs(
false) {}
176 return "X86 DAG->DAG Instruction Selection";
183 "indirect-tls-seg-refs");
188 "OptForMinSize implies OptForSize");
194 void emitFunctionEntryCode()
override;
198 void PreprocessISelDAG()
override;
199 void PostprocessISelDAG()
override;
202 #include "X86GenDAGISel.inc"
207 bool foldOffsetIntoAddress(
uint64_t Offset, X86ISelAddressMode &AM);
208 bool matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
209 bool AllowSegmentRegForX32 =
false);
210 bool matchWrapper(
SDValue N, X86ISelAddressMode &AM);
211 bool matchAddress(
SDValue N, X86ISelAddressMode &AM);
212 bool matchVectorAddress(
SDValue N, X86ISelAddressMode &AM);
213 bool matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
unsigned Depth);
214 bool matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
216 bool matchVectorAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
218 bool matchAddressBase(
SDValue N, X86ISelAddressMode &AM);
247 return tryFoldLoad(
P,
P,
N,
Base, Scale,
Index, Disp, Segment);
255 bool isProfitableToFormMaskedOp(
SDNode *
N)
const;
258 bool SelectInlineAsmMemoryOperand(
const SDValue &
Op,
259 unsigned ConstraintID,
260 std::vector<SDValue> &OutOps)
override;
262 void emitSpecialCodeForMain();
264 inline void getAddressOperands(X86ISelAddressMode &AM,
const SDLoc &
DL,
268 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
269 Base = CurDAG->getTargetFrameIndex(
270 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
271 else if (AM.Base_Reg.getNode())
274 Base = CurDAG->getRegister(0, VT);
276 Scale = getI8Imm(AM.Scale,
DL);
279 if (AM.NegateIndex) {
280 unsigned NegOpc = VT ==
MVT::i64 ? X86::NEG64r : X86::NEG32r;
286 if (AM.IndexReg.getNode())
289 Index = CurDAG->getRegister(0, VT);
294 Disp = CurDAG->getTargetGlobalAddress(AM.GV,
SDLoc(),
298 Disp = CurDAG->getTargetConstantPool(AM.CP,
MVT::i32, AM.Alignment,
299 AM.Disp, AM.SymbolFlags);
301 assert(!AM.Disp &&
"Non-zero displacement is ignored with ES.");
302 Disp = CurDAG->getTargetExternalSymbol(AM.ES,
MVT::i32, AM.SymbolFlags);
303 }
else if (AM.MCSym) {
304 assert(!AM.Disp &&
"Non-zero displacement is ignored with MCSym.");
305 assert(AM.SymbolFlags == 0 &&
"oo");
306 Disp = CurDAG->getMCSymbol(AM.MCSym,
MVT::i32);
307 }
else if (AM.JT != -1) {
308 assert(!AM.Disp &&
"Non-zero displacement is ignored with JT.");
309 Disp = CurDAG->getTargetJumpTable(AM.JT,
MVT::i32, AM.SymbolFlags);
310 }
else if (AM.BlockAddr)
311 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr,
MVT::i32, AM.Disp,
314 Disp = CurDAG->getTargetConstant(AM.Disp,
DL,
MVT::i32);
316 if (AM.Segment.getNode())
317 Segment = AM.Segment;
319 Segment = CurDAG->getRegister(0,
MVT::i16);
328 bool shouldAvoidImmediateInstFormsForSize(
SDNode *
N)
const {
334 if (!CurDAG->shouldOptForSize())
344 if (
User->isMachineOpcode()) {
367 auto *
C = dyn_cast<ConstantSDNode>(
N);
388 (RegNode = dyn_cast_or_null<RegisterSDNode>(
391 (RegNode->
getReg() == X86::RSP))
400 return (UseCount > 1);
420 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
422 MVT VecVT =
N->getOperand(0).getSimpleValueType();
428 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
430 MVT VecVT =
N->getSimpleValueType(0);
434 SDValue getPermuteVINSERTCommutedImmediate(
SDNode *
N,
unsigned VecWidth,
436 assert(VecWidth == 128 &&
"Unexpected vector width");
438 MVT VecVT =
N->getSimpleValueType(0);
440 assert((InsertIdx == 0 || InsertIdx == 1) &&
"Bad insertf128 index");
443 return getI8Imm(InsertIdx ? 0x02 : 0x30,
DL);
448 MVT VT =
N->getSimpleValueType(0);
453 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs,
None), 0);
456 CurDAG->getMachineNode(
457 TargetOpcode::SUBREG_TO_REG, dl,
MVT::i64,
458 CurDAG->getTargetConstant(0, dl,
MVT::i64), Zero,
459 CurDAG->getTargetConstant(X86::sub_32bit, dl,
MVT::i32)),
464 unsigned Opcode =
N->getOpcode();
466 "Unexpected opcode for SBB materialization");
467 unsigned FlagOpIndex = Opcode ==
X86ISD::SBB ? 2 : 1;
469 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
470 N->getOperand(FlagOpIndex),
SDValue());
474 unsigned Opc = VT ==
MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
476 VTs = CurDAG->getVTList(SBBVT,
MVT::i32);
478 CurDAG->getMachineNode(Opc, dl, VTs,
479 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
485 bool isUnneededShiftMask(
SDNode *
N,
unsigned Width)
const {
487 const APInt &Val = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
492 APInt Mask = Val | CurDAG->computeKnownBits(
N->getOperand(0)).Zero;
493 return Mask.countTrailingOnes() >=
Width;
499 SDNode *getGlobalBaseReg();
510 return Subtarget->getInstrInfo();
519 bool ComplexPatternFuncMutatesDAG()
const override {
523 bool isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const;
527 if (!
N->isNonTemporal())
530 unsigned StoreSize =
N->getMemoryVT().getStoreSize();
532 if (
N->getAlign().value() < StoreSize)
541 return Subtarget->hasSSE41();
543 return Subtarget->hasAVX2();
545 return Subtarget->hasAVX512();
549 bool foldLoadStoreIntoMemOperand(
SDNode *Node);
551 bool matchBitExtract(
SDNode *Node);
552 bool shrinkAndImmediate(
SDNode *
N);
553 bool isMaskZeroExtended(
SDNode *
N)
const;
554 bool tryShiftAmountMod(
SDNode *
N);
555 bool tryShrinkShlLogicImm(
SDNode *
N);
561 bool tryMatchBitSelect(
SDNode *
N);
563 MachineSDNode *emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
565 MachineSDNode *emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
569 bool tryOptimizeRem8Extend(
SDNode *
N);
571 bool onlyUsesZeroFlag(
SDValue Flags)
const;
572 bool hasNoSignFlagUses(
SDValue Flags)
const;
573 bool hasNoCarryFlagUses(
SDValue Flags)
const;
581 unsigned Opcode =
N->getOpcode();
588 EVT OpVT =
N->getOperand(0).getValueType();
592 OpVT =
N->getOperand(1).getValueType();
594 return Subtarget->hasVLX();
608 bool X86DAGToDAGISel::isMaskZeroExtended(
SDNode *
N)
const {
630 if (useNonTemporalLoad(cast<LoadSDNode>(
N)))
662 if (
Imm->getAPIntValue().isSignedIntN(8))
671 Imm->getAPIntValue().getBitWidth() == 64 &&
672 Imm->getAPIntValue().isIntN(32))
679 (
Imm->getAPIntValue() == UINT8_MAX ||
680 Imm->getAPIntValue() == UINT16_MAX ||
681 Imm->getAPIntValue() == UINT32_MAX))
687 (-
Imm->getAPIntValue()).isSignedIntN(8))
691 (-
Imm->getAPIntValue()).isSignedIntN(8) &&
692 hasNoCarryFlagUses(
SDValue(U, 1)))
729 auto *
C = dyn_cast<ConstantSDNode>(U0.
getOperand(0));
730 if (
C &&
C->getSExtValue() == -2)
735 auto *
C = dyn_cast<ConstantSDNode>(U1.
getOperand(0));
736 if (
C &&
C->getSExtValue() == -2)
771 bool X86DAGToDAGISel::isProfitableToFormMaskedOp(
SDNode *
N)
const {
774 "Unexpected opcode!");
779 return N->getOperand(1).hasOneUse();
789 Ops.push_back(
Load.getOperand(0));
792 "Unexpected chain operand");
795 Ops.push_back(
Load.getOperand(0));
801 Ops.push_back(NewChain);
806 Load.getOperand(1),
Load.getOperand(2));
810 Ops.
append(Call->op_begin() + 1, Call->op_end());
844 if (isa<MemSDNode>(Chain.
getNode()) &&
845 cast<MemSDNode>(Chain.
getNode())->writeMem())
851 Callee.getValue(1).hasOneUse())
859 if ((
Imm & 0x00FFFFFF) != 0x0F1EFA)
862 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
863 0x65, 0x66, 0x67, 0xf0, 0xf2};
866 uint8_t Byte = (
Imm >>
i) & 0xFF;
877 void X86DAGToDAGISel::PreprocessISelDAG() {
878 bool MadeChange =
false;
880 E = CurDAG->allnodes_end();
I !=
E; ) {
899 MVT VT =
N->getSimpleValueType(0);
900 int64_t
Imm = cast<ConstantSDNode>(
N)->getSExtValue();
901 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
908 SDValue Complement = CurDAG->getConstant(~
Imm, dl, VT,
false,
true);
909 Complement = CurDAG->getNOT(dl, Complement, VT);
911 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Complement);
921 if (
N->getOpcode() ==
X86ISD::AND && !
N->hasAnyUseOfValue(1)) {
923 N->getOperand(0),
N->getOperand(1));
925 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
949 auto mayPreventLoadFold = [&]() {
951 N->getOpcode() ==
ISD::ADD && Subtarget->hasAVX() &&
952 !
N->getOperand(1).hasOneUse();
955 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
961 MVT VT =
N->getSimpleValueType(0);
965 AllOnes = CurDAG->getBitcast(VT, AllOnes);
969 CurDAG->getNode(NewOpcode,
DL, VT,
N->getOperand(0), AllOnes);
971 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
978 switch (
N->getOpcode()) {
980 MVT VT =
N->getSimpleValueType(0);
989 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
992 CurDAG->getIntPtrConstant(
Index, dl));
995 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1004 MVT VT =
N->getSimpleValueType(0);
1008 auto *MemNode = cast<MemSDNode>(
N);
1011 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1012 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1014 MemNode->getMemOperand());
1017 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1020 CurDAG->getIntPtrConstant(
Index, dl));
1024 CurDAG->ReplaceAllUsesWith(
N, To);
1034 if (
N->getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
1037 assert(Subtarget->hasSSE41() &&
"Expected SSE4.1 support!");
1040 N->getOperand(0),
N->getOperand(1),
N->getOperand(2));
1042 CurDAG->ReplaceAllUsesWith(
N, Blendv.
getNode());
1055 if (!
N->getSimpleValueType(0).isVector())
1059 switch (
N->getOpcode()) {
1069 if (
N->isStrictFPOpcode())
1072 {
N->getOperand(0),
N->getOperand(1)});
1075 CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1078 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1088 if (!
N->getValueType(0).isVector())
1092 switch (
N->getOpcode()) {
1098 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1099 N->getOperand(0),
N->getOperand(1));
1101 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1110 if (!
N->getValueType(0).isVector())
1114 if (
N->getOperand(0).getScalarValueSizeInBits() == 1) {
1116 "Unexpected opcode for mask vector!");
1124 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1127 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1147 switch (
N->getOpcode()) {
1163 bool IsStrict =
N->isStrictFPOpcode();
1168 {
N->getOperand(0),
N->getOperand(1),
1175 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1186 MVT VT =
N->getSimpleValueType(0);
1201 if (Subtarget->hasSSE2()) {
1206 switch (
N->getOpcode()) {
1213 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1216 Res = CurDAG->getNode(
N->getOpcode(), dl, VecVT, Op0, Op1);
1219 CurDAG->getIntPtrConstant(0, dl));
1221 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1231 !Subtarget->useIndirectThunkCalls() &&
1232 ((
N->getOpcode() ==
X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1234 (Subtarget->is64Bit() ||
1235 !getTargetMachine().isPositionIndependent())))) {
1274 switch (
N->getOpcode()) {
1279 MVT SrcVT =
N->getOperand(0).getSimpleValueType();
1280 MVT DstVT =
N->getSimpleValueType(0);
1292 if (SrcIsSSE && DstIsSSE)
1295 if (!SrcIsSSE && !DstIsSSE) {
1300 if (
N->getConstantOperandVal(1))
1308 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1309 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1317 CurDAG->getEntryNode(), dl,
N->getOperand(0), MemTmp, MPI, MemVT);
1319 MemTmp, MPI, MemVT);
1326 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Result);
1335 MVT SrcVT =
N->getOperand(1).getSimpleValueType();
1336 MVT DstVT =
N->getSimpleValueType(0);
1348 if (SrcIsSSE && DstIsSSE)
1351 if (!SrcIsSSE && !DstIsSSE) {
1356 if (
N->getConstantOperandVal(2))
1364 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1365 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1376 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), MemTmp};
1380 if (
N->getFlags().hasNoFPExcept()) {
1383 Store->setFlags(Flags);
1386 assert(SrcVT == MemVT &&
"Unexpected VT!");
1387 Store = CurDAG->getStore(
N->getOperand(0), dl,
N->getOperand(1), MemTmp,
1394 Result = CurDAG->getMemIntrinsicNode(
1397 if (
N->getFlags().hasNoFPExcept()) {
1403 assert(DstVT == MemVT &&
"Unexpected VT!");
1404 Result = CurDAG->getLoad(DstVT, dl,
Store, MemTmp, MPI);
1412 CurDAG->ReplaceAllUsesWith(
N,
Result.getNode());
1426 CurDAG->RemoveDeadNodes();
1430 bool X86DAGToDAGISel::tryOptimizeRem8Extend(
SDNode *
N) {
1431 unsigned Opc =
N->getMachineOpcode();
1432 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1433 Opc != X86::MOVSX64rr8)
1445 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1446 : X86::MOVSX32rr8_NOREX;
1451 if (Opc == X86::MOVSX64rr8) {
1456 ReplaceUses(
N, Extend);
1465 void X86DAGToDAGISel::PostprocessISelDAG() {
1472 bool MadeChange =
false;
1473 while (Position != CurDAG->allnodes_begin()) {
1476 if (
N->use_empty() || !
N->isMachineOpcode())
1479 if (tryOptimizeRem8Extend(
N)) {
1486 unsigned Opc =
N->getMachineOpcode();
1487 if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1488 Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1489 N->getOperand(0) ==
N->getOperand(1) &&
1490 N->isOnlyUserOf(
N->getOperand(0).getNode()) &&
1491 N->getOperand(0).isMachineOpcode()) {
1493 unsigned N0Opc =
And.getMachineOpcode();
1494 if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1495 N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1500 ReplaceUses(
N, Test);
1504 if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1505 N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1508 case X86::AND8rm: NewOpc = X86::TEST8mr;
break;
1509 case X86::AND16rm: NewOpc = X86::TEST16mr;
break;
1510 case X86::AND32rm: NewOpc = X86::TEST32mr;
break;
1511 case X86::AND64rm: NewOpc = X86::TEST64mr;
break;
1521 And.getOperand(6) };
1524 CurDAG->setNodeMemRefs(
1525 Test, cast<MachineSDNode>(
And.getNode())->memoperands());
1526 ReplaceUses(
N, Test);
1536 if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1537 Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1538 N->getOperand(0) ==
N->getOperand(1) &&
1539 N->isOnlyUserOf(
N->getOperand(0).getNode()) &&
1540 N->getOperand(0).isMachineOpcode() &&
1543 unsigned N0Opc =
And.getMachineOpcode();
1546 if (N0Opc == X86::KANDBrr ||
1547 (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1548 N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1552 case X86::KORTESTBrr: NewOpc = X86::KTESTBrr;
break;
1553 case X86::KORTESTWrr: NewOpc = X86::KTESTWrr;
break;
1554 case X86::KORTESTDrr: NewOpc = X86::KTESTDrr;
break;
1555 case X86::KORTESTQrr: NewOpc = X86::KTESTQrr;
break;
1561 ReplaceUses(
N, KTest);
1568 if (Opc != TargetOpcode::SUBREG_TO_REG)
1571 unsigned SubRegIdx =
N->getConstantOperandVal(2);
1572 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1583 case X86::VMOVAPDrr:
case X86::VMOVUPDrr:
1584 case X86::VMOVAPSrr:
case X86::VMOVUPSrr:
1585 case X86::VMOVDQArr:
case X86::VMOVDQUrr:
1586 case X86::VMOVAPDYrr:
case X86::VMOVUPDYrr:
1587 case X86::VMOVAPSYrr:
case X86::VMOVUPSYrr:
1588 case X86::VMOVDQAYrr:
case X86::VMOVDQUYrr:
1589 case X86::VMOVAPDZ128rr:
case X86::VMOVUPDZ128rr:
1590 case X86::VMOVAPSZ128rr:
case X86::VMOVUPSZ128rr:
1591 case X86::VMOVDQA32Z128rr:
case X86::VMOVDQU32Z128rr:
1592 case X86::VMOVDQA64Z128rr:
case X86::VMOVDQU64Z128rr:
1593 case X86::VMOVAPDZ256rr:
case X86::VMOVUPDZ256rr:
1594 case X86::VMOVAPSZ256rr:
case X86::VMOVUPSZ256rr:
1595 case X86::VMOVDQA32Z256rr:
case X86::VMOVDQU32Z256rr:
1596 case X86::VMOVDQA64Z256rr:
case X86::VMOVDQU64Z256rr:
1601 if (!
In.isMachineOpcode() ||
1602 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1607 uint64_t TSFlags = getInstrInfo()->get(
In.getMachineOpcode()).TSFlags;
1615 CurDAG->UpdateNodeOperands(
N,
N->getOperand(0),
In,
N->getOperand(2));
1620 CurDAG->RemoveDeadNodes();
1625 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1626 if (Subtarget->isTargetCygMing()) {
1628 auto &
DL = CurDAG->getDataLayout();
1631 CLI.setChain(CurDAG->getRoot())
1633 CurDAG->getExternalSymbol(
"__main", TLI->getPointerTy(
DL)),
1637 CurDAG->setRoot(
Result.second);
1641 void X86DAGToDAGISel::emitFunctionEntryCode() {
1644 if (
F.hasExternalLinkage() &&
F.getName() ==
"main")
1645 emitSpecialCodeForMain();
1655 return isInt<31>(Val);
1658 bool X86DAGToDAGISel::foldOffsetIntoAddress(
uint64_t Offset,
1659 X86ISelAddressMode &AM) {
1664 int64_t Val = AM.Disp +
Offset;
1667 if (Val != 0 && (AM.ES || AM.MCSym))
1671 if (Subtarget->is64Bit()) {
1674 AM.hasSymbolicDisplacement()))
1678 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1687 bool X86DAGToDAGISel::matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
1688 bool AllowSegmentRegForX32) {
1701 if (
C->getSExtValue() == 0 && AM.Segment.getNode() ==
nullptr &&
1702 !IndirectTlsSegRefs &&
1703 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1704 Subtarget->isTargetFuchsia())) {
1705 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1707 switch (
N->getPointerInfo().getAddrSpace()) {
1726 bool X86DAGToDAGISel::matchWrapper(
SDValue N, X86ISelAddressMode &AM) {
1729 if (AM.hasSymbolicDisplacement())
1732 bool IsRIPRelTLS =
false;
1746 if (Subtarget->is64Bit() &&
1752 if (IsRIPRel && AM.hasBaseOrIndexReg())
1756 X86ISelAddressMode Backup = AM;
1761 AM.GV =
G->getGlobal();
1762 AM.SymbolFlags =
G->getTargetFlags();
1765 AM.CP =
CP->getConstVal();
1766 AM.Alignment =
CP->getAlign();
1767 AM.SymbolFlags =
CP->getTargetFlags();
1770 AM.ES =
S->getSymbol();
1771 AM.SymbolFlags =
S->getTargetFlags();
1772 }
else if (
auto *
S = dyn_cast<MCSymbolSDNode>(N0)) {
1773 AM.MCSym =
S->getMCSymbol();
1775 AM.JT = J->getIndex();
1776 AM.SymbolFlags = J->getTargetFlags();
1778 AM.BlockAddr = BA->getBlockAddress();
1779 AM.SymbolFlags = BA->getTargetFlags();
1780 Offset = BA->getOffset();
1784 if (foldOffsetIntoAddress(Offset, AM)) {
1790 AM.setBaseReg(CurDAG->getRegister(X86::RIP,
MVT::i64));
1798 bool X86DAGToDAGISel::matchAddress(
SDValue N, X86ISelAddressMode &AM) {
1799 if (matchAddressRecursively(
N, AM, 0))
1806 if (Subtarget->isTarget64BitILP32() &&
1807 AM.BaseType == X86ISelAddressMode::RegBase &&
1808 AM.Base_Reg.getNode() !=
nullptr && AM.IndexReg.getNode() ==
nullptr) {
1809 SDValue Save_Base_Reg = AM.Base_Reg;
1810 if (
auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1812 if (matchLoadInAddress(LoadN, AM,
true))
1813 AM.Base_Reg = Save_Base_Reg;
1819 if (AM.Scale == 2 &&
1820 AM.BaseType == X86ISelAddressMode::RegBase &&
1821 AM.Base_Reg.getNode() ==
nullptr) {
1822 AM.Base_Reg = AM.IndexReg;
1829 switch (
TM.getCodeModel()) {
1833 if (Subtarget->is64Bit() &&
1835 AM.BaseType == X86ISelAddressMode::RegBase &&
1836 AM.Base_Reg.getNode() ==
nullptr &&
1837 AM.IndexReg.getNode() ==
nullptr &&
1839 AM.hasSymbolicDisplacement())
1840 AM.Base_Reg = CurDAG->getRegister(X86::RIP,
MVT::i64);
1847 bool X86DAGToDAGISel::matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
1853 X86ISelAddressMode Backup = AM;
1854 if (!matchAddressRecursively(
N.getOperand(0), AM,
Depth+1) &&
1855 !matchAddressRecursively(Handle.getValue().getOperand(1), AM,
Depth+1))
1860 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1862 !matchAddressRecursively(Handle.getValue().getOperand(0), AM,
Depth + 1))
1869 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1870 !AM.Base_Reg.getNode() &&
1871 !AM.IndexReg.getNode()) {
1872 N = Handle.getValue();
1873 AM.Base_Reg =
N.getOperand(0);
1874 AM.IndexReg =
N.getOperand(1);
1878 N = Handle.getValue();
1888 if (
N->getNodeId() == -1 ||
1908 X86ISelAddressMode &AM) {
1910 !isa<ConstantSDNode>(
Shift.getOperand(1)) ||
1914 int ScaleLog = 8 -
Shift.getConstantOperandVal(1);
1915 if (ScaleLog <= 0 || ScaleLog >= 4 ||
1916 Mask != (0xffu << ScaleLog))
1919 MVT VT =
N.getSimpleValueType();
1942 AM.Scale = (1 << ScaleLog);
1950 X86ISelAddressMode &AM) {
1956 int64_t
Mask = cast<ConstantSDNode>(
N->getOperand(1))->getSExtValue();
1961 bool FoundAnyExtend =
false;
1965 FoundAnyExtend =
true;
1970 !isa<ConstantSDNode>(
Shift.getOperand(1)))
1978 if (!
N.hasOneUse() || !
Shift.hasOneUse())
1982 unsigned ShiftAmt =
Shift.getConstantOperandVal(1);
1983 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1986 MVT VT =
N.getSimpleValueType();
1988 if (FoundAnyExtend) {
2009 AM.Scale = 1 << ShiftAmt;
2010 AM.IndexReg = NewAnd;
2044 X86ISelAddressMode &AM) {
2046 !isa<ConstantSDNode>(
Shift.getOperand(1)))
2049 unsigned ShiftAmt =
Shift.getConstantOperandVal(1);
2055 unsigned AMShiftAmt = MaskTZ;
2059 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2066 unsigned ScaleDown = (64 -
X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2067 if (MaskLZ < ScaleDown)
2069 MaskLZ -= ScaleDown;
2077 bool ReplacingAnyExtend =
false;
2079 unsigned ExtendBits =
X.getSimpleValueType().getSizeInBits() -
2080 X.getOperand(0).getSimpleValueType().getSizeInBits();
2083 X =
X.getOperand(0);
2084 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2085 ReplacingAnyExtend =
true;
2087 APInt MaskedHighBits =
2090 if (MaskedHighBits != Known.
Zero)
return true;
2094 MVT VT =
N.getSimpleValueType();
2095 if (ReplacingAnyExtend) {
2096 assert(
X.getValueType() != VT);
2120 AM.Scale = 1 << AMShiftAmt;
2121 AM.IndexReg = NewSRL;
2131 X86ISelAddressMode &AM,
2134 !isa<ConstantSDNode>(
Shift.getOperand(1)) ||
2135 !
Shift.hasOneUse() || !
N.hasOneUse())
2139 if (!Subtarget.hasTBM() &&
2140 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2146 unsigned ShiftAmt =
Shift.getConstantOperandVal(1);
2154 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2156 MVT VT =
N.getSimpleValueType();
2179 AM.Scale = 1 << AMShiftAmt;
2180 AM.IndexReg = NewAnd;
2184 bool X86DAGToDAGISel::matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
2188 dbgs() <<
"MatchAddress: ";
2193 return matchAddressBase(
N, AM);
2198 if (AM.isRIPRelative()) {
2202 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2206 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2211 switch (
N.getOpcode()) {
2214 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2215 if (
const auto *ESNode = dyn_cast<MCSymbolSDNode>(
N.getOperand(0))) {
2217 AM.MCSym = ESNode->getMCSymbol();
2223 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2224 if (!foldOffsetIntoAddress(Val, AM))
2231 if (!matchWrapper(
N, AM))
2236 if (!matchLoadInAddress(cast<LoadSDNode>(
N), AM))
2241 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2242 AM.Base_Reg.getNode() ==
nullptr &&
2244 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2245 AM.Base_FrameIndex = cast<FrameIndexSDNode>(
N)->getIndex();
2251 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2255 unsigned Val = CN->getZExtValue();
2260 if (Val == 1 || Val == 2 || Val == 3) {
2261 AM.Scale = 1 << Val;
2267 if (CurDAG->isBaseWithConstantOffset(ShVal)) {
2271 if (!foldOffsetIntoAddress(Disp, AM))
2275 AM.IndexReg = ShVal;
2283 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2287 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2288 "Unexpected value size!");
2297 if (!isa<ConstantSDNode>(
N.getOperand(1)) ||
2298 !isa<ConstantSDNode>(
And.getOperand(1)))
2300 uint64_t Mask =
And.getConstantOperandVal(1) >>
N.getConstantOperandVal(1);
2312 if (
N.getResNo() != 0)
break;
2317 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2318 AM.Base_Reg.getNode() ==
nullptr &&
2319 AM.IndexReg.getNode() ==
nullptr) {
2321 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2322 CN->getZExtValue() == 9) {
2323 AM.Scale = unsigned(CN->getZExtValue())-1;
2337 if (foldOffsetIntoAddress(Disp, AM))
2338 Reg =
N.getOperand(0);
2340 Reg =
N.getOperand(0);
2343 AM.IndexReg = AM.Base_Reg =
Reg;
2362 X86ISelAddressMode Backup = AM;
2363 if (matchAddressRecursively(
N.getOperand(0), AM,
Depth+1)) {
2364 N = Handle.getValue();
2368 N = Handle.getValue();
2370 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2389 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2390 !AM.Base_Reg.getNode()->hasOneUse()) ||
2391 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2395 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2396 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2397 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2409 AM.NegateIndex =
true;
2415 if (!matchAdd(
N, AM,
Depth))
2426 if (CurDAG->haveNoCommonBitsSet(
N.getOperand(0),
N.getOperand(1)) &&
2444 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2448 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2449 "Unexpected value size!");
2451 if (!isa<ConstantSDNode>(
N.getOperand(1)))
2454 if (
N.getOperand(0).getOpcode() ==
ISD::SRL) {
2483 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2485 if (
N.getOperand(0).getOpcode() !=
ISD::SHL || !
N.getOperand(0).hasOneUse())
2490 auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.
getOperand(1));
2491 if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2497 ShAmtC->getZExtValue());
2498 if (!CurDAG->MaskedValueIsZero(Shl.
getOperand(0), HighZeros))
2502 MVT VT =
N.getSimpleValueType();
2508 AM.Scale = 1 << ShAmtC->getZExtValue();
2513 CurDAG->ReplaceAllUsesWith(
N, NewShl);
2514 CurDAG->RemoveDeadNode(
N.getNode());
2519 return matchAddressBase(
N, AM);
2524 bool X86DAGToDAGISel::matchAddressBase(
SDValue N, X86ISelAddressMode &AM) {
2526 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2528 if (!AM.IndexReg.getNode()) {
2539 AM.BaseType = X86ISelAddressMode::RegBase;
2544 bool X86DAGToDAGISel::matchVectorAddressRecursively(
SDValue N,
2545 X86ISelAddressMode &AM,
2549 dbgs() <<
"MatchVectorAddress: ";
2554 return matchAddressBase(
N, AM);
2557 switch (
N.getOpcode()) {
2559 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2560 if (!foldOffsetIntoAddress(Val, AM))
2565 if (!matchWrapper(
N, AM))
2573 X86ISelAddressMode Backup = AM;
2574 if (!matchVectorAddressRecursively(
N.getOperand(0), AM,
Depth + 1) &&
2575 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2581 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2583 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2588 N = Handle.getValue();
2593 return matchAddressBase(
N, AM);
2599 bool X86DAGToDAGISel::matchVectorAddress(
SDValue N, X86ISelAddressMode &AM) {
2600 return matchVectorAddressRecursively(
N, AM, 0);
2608 X86ISelAddressMode AM;
2609 AM.IndexReg = IndexOp;
2610 AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
2624 if (matchVectorAddress(BasePtr, AM))
2627 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2641 X86ISelAddressMode AM;
2653 unsigned AddrSpace =
2654 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2665 MVT VT =
N.getSimpleValueType();
2667 if (matchAddress(
N, AM))
2670 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2680 N =
N.getOperand(0);
2692 cast<GlobalAddressSDNode>(
N)->getGlobal()->getAbsoluteSymbolRange();
2705 if (!selectLEAAddr(
N,
Base, Scale,
Index, Disp, Segment))
2709 if (
RN &&
RN->getReg() == 0)
2711 else if (
Base.getValueType() ==
MVT::i32 && !isa<FrameIndexSDNode>(
Base)) {
2715 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL,
MVT::i64, ImplDef,
2719 RN = dyn_cast<RegisterSDNode>(
Index);
2720 if (
RN &&
RN->getReg() == 0)
2724 "Expect to be extending 32-bit registers for use in LEA");
2727 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL,
MVT::i64, ImplDef,
2736 bool X86DAGToDAGISel::selectLEAAddr(
SDValue N,
2740 X86ISelAddressMode AM;
2744 MVT VT =
N.getSimpleValueType();
2751 if (matchAddress(
N, AM))
2756 unsigned Complexity = 0;
2757 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2759 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2762 if (AM.IndexReg.getNode())
2775 if (AM.hasSymbolicDisplacement()) {
2777 if (Subtarget->is64Bit())
2787 auto isMathWithFlags = [](
SDValue V) {
2788 switch (V.getOpcode()) {
2809 if (isMathWithFlags(
N.getOperand(0)) || isMathWithFlags(
N.getOperand(1)))
2817 if (Complexity <= 2)
2820 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2831 X86ISelAddressMode AM;
2836 if (Subtarget->is32Bit()) {
2841 MVT VT =
N.getSimpleValueType();
2842 getAddressOperands(AM,
SDLoc(
N), VT,
Base, Scale,
Index, Disp, Segment);
2850 EVT VT =
N.getValueType();
2851 bool WasTruncated =
false;
2853 WasTruncated =
true;
2854 N =
N.getOperand(0);
2863 unsigned Opc =
N.getOperand(0)->getOpcode();
2865 Op =
N.getOperand(0);
2868 return !WasTruncated;
2872 auto *GA = cast<GlobalAddressSDNode>(
N.getOperand(0));
2887 assert(Root &&
P &&
"Unknown root/parent nodes");
2889 !IsProfitableToFold(
N,
P, Root) ||
2890 !IsLegalToFold(
N,
P, Root, OptLevel))
2893 return selectAddr(
N.getNode(),
2894 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
2901 assert(Root &&
P &&
"Unknown root/parent nodes");
2903 !IsProfitableToFold(
N,
P, Root) ||
2904 !IsLegalToFold(
N,
P, Root, OptLevel))
2907 return selectAddr(
N.getNode(),
2908 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
2914 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2915 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2920 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const {
2922 N =
N->getOperand(0).getNode();
2926 auto *GA = dyn_cast<GlobalAddressSDNode>(
N->getOperand(0));
2939 assert(
N->isMachineOpcode() &&
"Unexpected node");
2940 unsigned Opc =
N->getMachineOpcode();
2941 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
2946 return static_cast<X86::CondCode>(
N->getConstantOperandVal(CondNo));
2951 bool X86DAGToDAGISel::onlyUsesZeroFlag(
SDValue Flags)
const {
2956 if (UI.getUse().getResNo() != Flags.
getResNo())
2960 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2964 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
2966 if (FlagUI.getUse().getResNo() != 1)
continue;
2968 if (!FlagUI->isMachineOpcode())
return false;
2987 bool X86DAGToDAGISel::hasNoSignFlagUses(
SDValue Flags)
const {
2992 if (UI.getUse().getResNo() != Flags.
getResNo())
2996 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3000 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
3002 if (FlagUI.getUse().getResNo() != 1)
continue;
3004 if (!FlagUI->isMachineOpcode())
return false;
3043 bool X86DAGToDAGISel::hasNoCarryFlagUses(
SDValue Flags)
const {
3048 if (UI.getUse().getResNo() != Flags.
getResNo())
3051 unsigned UIOpc = UI->getOpcode();
3055 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3059 FlagUI != FlagUE; ++FlagUI) {
3061 if (FlagUI.getUse().getResNo() != 1)
3064 if (!FlagUI->isMachineOpcode())
3105 if (StoredVal.
getResNo() != 0)
return false;
3119 LoadNode = cast<LoadSDNode>(
Load);
3122 if (!
Load.hasOneUse())
3130 bool FoundLoad =
false;
3134 const unsigned int Max = 1024;
3176 if (Chain ==
Load.getValue(1)) {
3178 ChainOps.push_back(
Load.getOperand(0));
3182 if (
Op ==
Load.getValue(1)) {
3185 ChainOps.push_back(
Load.getOperand(0));
3188 LoopWorklist.push_back(
Op.getNode());
3189 ChainOps.push_back(
Op);
3198 if (
Op.getNode() != LoadNode)
3199 LoopWorklist.push_back(
Op.getNode());
3231 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(
SDNode *Node) {
3244 bool IsCommutable =
false;
3245 bool IsNegate =
false;
3259 IsCommutable =
true;
3263 unsigned LoadOpNo = IsNegate ? 1 : 0;
3267 LoadNode, InputChain)) {
3274 LoadNode, InputChain))
3283 auto SelectOpcode = [&](
unsigned Opc64,
unsigned Opc32,
unsigned Opc16,
3304 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3314 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3318 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3321 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3322 : SelectOpcode(
X86::DEC64m,
X86::DEC32m,
X86::DEC16m,
X86::DEC8m);
3335 auto SelectRegOpcode = [SelectOpcode](
unsigned Opc) {
3338 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3341 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3344 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3347 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3350 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3353 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3355 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3361 auto SelectImm8Opcode = [SelectOpcode](
unsigned Opc) {
3364 return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3366 return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3368 return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3370 return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3372 return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3374 return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3376 return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3381 auto SelectImmOpcode = [SelectOpcode](
unsigned Opc) {
3384 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3387 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3390 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3393 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3396 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3399 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3402 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3409 unsigned NewOpc = SelectRegOpcode(Opc);
3414 if (
auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3415 int64_t OperandV = OperandC->getSExtValue();
3424 hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3425 OperandV = -OperandV;
3432 Operand = CurDAG->getTargetConstant(OperandV,
SDLoc(Node), MemVT);
3433 NewOpc = SelectImm8Opcode(Opc);
3435 Operand = CurDAG->getTargetConstant(OperandV,
SDLoc(Node), MemVT);
3436 NewOpc = SelectImmOpcode(Opc);
3442 CurDAG->getCopyToReg(InputChain,
SDLoc(Node), X86::EFLAGS,
3446 Segment, Operand, CopyTo, CopyTo.
getValue(1)};
3451 Segment, Operand, InputChain};
3463 CurDAG->setNodeMemRefs(Result, MemOps);
3469 CurDAG->RemoveDeadNode(Node);
3479 bool X86DAGToDAGISel::matchBitExtract(
SDNode *Node) {
3482 "Should be either an and-mask, or right-shift after clearing high bits.");
3485 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3488 MVT NVT = Node->getSimpleValueType(0);
3499 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3500 auto checkUses = [AllowExtraUsesByDefault](
SDValue Op,
unsigned NUses,
3502 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3503 Op.getNode()->hasNUsesOfValue(NUses,
Op.getResNo());
3505 auto checkOneUse = [checkUses](
SDValue Op,
3507 return checkUses(
Op, 1, AllowExtraUses);
3509 auto checkTwoUse = [checkUses](
SDValue Op,
3511 return checkUses(
Op, 2, AllowExtraUses);
3514 auto peekThroughOneUseTruncation = [checkOneUse](
SDValue V) {
3517 V.getOperand(0).getSimpleValueType() ==
MVT::i64 &&
3518 "Expected i64 -> i32 truncation");
3519 V = V.getOperand(0);
3525 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3534 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3539 NBits =
M0->getOperand(1);
3540 NegateNBits =
false;
3544 auto isAllOnes = [
this, peekThroughOneUseTruncation, NVT](
SDValue V) {
3545 V = peekThroughOneUseTruncation(V);
3546 return CurDAG->MaskedValueIsAllOnes(
3552 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3558 if (!isAllOnes(
Mask->getOperand(1)))
3561 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3565 if (!isAllOnes(
M0->getOperand(0)))
3567 NBits =
M0->getOperand(1);
3568 NegateNBits =
false;
3574 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](
SDValue ShiftAmt,
3575 unsigned Bitwidth) {
3580 NBits = NBits.getOperand(0);
3585 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3586 if (!V0 || V0->getZExtValue() != Bitwidth)
3588 NBits = NBits.getOperand(1);
3589 NegateNBits =
false;
3595 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3598 Mask = peekThroughOneUseTruncation(
Mask);
3599 unsigned Bitwidth =
Mask.getSimpleValueType().getSizeInBits();
3608 if (!checkOneUse(
M1))
3610 canonicalizeShiftAmt(
M1, Bitwidth);
3615 return !NegateNBits;
3623 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3624 AllowExtraUsesByDefault, &NegateNBits,
3628 SDValue N0 = Node->getOperand(0);
3632 SDValue N1 = Node->getOperand(1);
3637 canonicalizeShiftAmt(N1, Bitwidth);
3641 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3642 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3648 auto matchLowBitMask = [matchPatternA, matchPatternB,
3650 return matchPatternA(
Mask) || matchPatternB(
Mask) || matchPatternC(
Mask);
3653 if (Node->getOpcode() ==
ISD::AND) {
3654 X = Node->getOperand(0);
3657 if (matchLowBitMask(
Mask)) {
3661 if (!matchLowBitMask(
Mask))
3664 }
else if (!matchPatternD(Node))
3669 if (NegateNBits && !Subtarget->hasBMI2())
3681 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL,
MVT::i32), 0);
3686 NBits =
SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
DL,
3687 MVT::i32, ImplDef, NBits, SRIdxVal),
3701 if (Subtarget->hasBMI2()) {
3710 ReplaceNode(Node, Extract.
getNode());
3711 SelectCode(Extract.
getNode());
3720 SDValue RealX = peekThroughOneUseTruncation(
X);
3726 MVT XVT =
X.getSimpleValueType();
3744 SDValue ShiftAmt =
X.getOperand(1);
3745 X =
X.getOperand(0);
3748 "Expected shift amount to be i8");
3752 SDValue OrigShiftAmt = ShiftAmt;
3776 ReplaceNode(Node, Extract.
getNode());
3777 SelectCode(Extract.
getNode());
3784 MVT NVT = Node->getSimpleValueType(0);
3787 SDValue N0 = Node->getOperand(0);
3788 SDValue N1 = Node->getOperand(1);
3797 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
3798 if (!PreferBEXTR && !Subtarget->hasBMI2())
3816 if (!MaskCst || !ShiftCst)
3829 if (
Shift == 8 && MaskSize == 8)
3840 if (!PreferBEXTR && MaskSize <= 32)
3844 unsigned ROpc, MOpc;
3847 assert(Subtarget->hasBMI2() &&
"We must have BMI2's BZHI then.");
3851 Control = CurDAG->getTargetConstant(
Shift + MaskSize, dl, NVT);
3852 ROpc = NVT ==
MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
3853 MOpc = NVT ==
MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
3854 unsigned NewOpc = NVT ==
MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3861 Control = CurDAG->getTargetConstant(
Shift | (MaskSize << 8), dl, NVT);
3862 if (Subtarget->hasTBM()) {
3863 ROpc = NVT ==
MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3864 MOpc = NVT ==
MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3866 assert(Subtarget->hasBMI() &&
"We must have BMI1's BEXTR then.");
3868 ROpc = NVT ==
MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3869 MOpc = NVT ==
MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3870 unsigned NewOpc = NVT ==
MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3877 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3878 if (tryFoldLoad(Node, N0.
getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3880 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Control,
Input.getOperand(0)};
3882 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3886 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3888 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT,
MVT::i32, Input,
Control);
3893 SDValue ShAmt = CurDAG->getTargetConstant(
Shift, dl, NVT);
3894 unsigned NewOpc = NVT ==
MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
3896 CurDAG->getMachineNode(NewOpc, dl, NVT,
SDValue(NewNode, 0), ShAmt);
3903 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
3904 bool MayFoldLoad,
const SDLoc &dl,
3906 SDValue N0 = Node->getOperand(0);
3907 SDValue N1 = Node->getOperand(1);
3909 const ConstantInt *Val = cast<ConstantSDNode>(
Imm)->getConstantIntValue();
3910 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(Node),
Imm.getValueType());
3913 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3914 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3915 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
3918 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3922 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3928 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3935 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
3936 bool MayFoldLoad,
const SDLoc &dl,
3939 SDValue N0 = Node->getOperand(0);
3940 SDValue N2 = Node->getOperand(2);
3942 const ConstantInt *Val = cast<ConstantSDNode>(
Imm)->getConstantIntValue();
3943 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(Node),
Imm.getValueType());
3946 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3947 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3948 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
3951 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3956 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3962 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3967 bool X86DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
3968 EVT VT =
N->getValueType(0);
3977 SDValue OrigShiftAmt =
N->getOperand(1);
3978 SDValue ShiftAmt = OrigShiftAmt;
3992 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
3993 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
3996 if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
4001 Add0C->getZExtValue() != 0) {
4004 if (Add0C->getZExtValue() % Size == 0)
4006 else if (ShiftAmt.
hasOneUse() && Size == 64 &&
4007 Add0C->getZExtValue() % 32 == 0) {
4015 Add0 = CurDAG->getZExtOrTrunc(Add0,
DL, SubVT);
4019 X = CurDAG->getNode(
ISD::ADD,
DL, SubVT, Add1, Add0);
4026 SDValue Zero = CurDAG->getConstant(0,
DL, SubVT);
4049 CurDAG->getConstant(Size - 1,
DL,
MVT::i8));
4053 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(
N,
N->getOperand(0),
4055 if (UpdatedNode !=
N) {
4058 ReplaceNode(
N, UpdatedNode);
4065 CurDAG->RemoveDeadNode(OrigShiftAmt.
getNode());
4073 bool X86DAGToDAGISel::tryShrinkShlLogicImm(
SDNode *
N) {
4074 MVT NVT =
N->getSimpleValueType(0);
4075 unsigned Opcode =
N->getOpcode();
4092 bool FoundAnyExtend =
false;
4096 FoundAnyExtend =
true;
4115 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4116 if (Opcode !=
ISD::AND && (Val & RemovedBitsMask) != 0)
4121 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4125 ShiftedVal = (
uint64_t)Val >> ShAmt;
4129 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4132 ShiftedVal = Val >> ShAmt;
4138 ShiftedVal = (
uint64_t)Val >> ShAmt;
4146 if (!CanShrinkImmediate(ShiftedVal))
4164 if (CurDAG->MaskedValueIsZero(
N->getOperand(0), NeededMask))
4169 if (FoundAnyExtend) {
4175 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4177 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT,
X, NewCst);
4180 Shift.getOperand(1));
4186 bool X86DAGToDAGISel::matchVPTERNLOG(
SDNode *Root,
SDNode *ParentA,
4190 assert(
A.isOperandOf(ParentA) &&
B.isOperandOf(ParentB) &&
4191 C.isOperandOf(ParentC) &&
"Incorrect parent node");
4193 auto tryFoldLoadOrBCast =
4196 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4202 L = L.getOperand(0);
4209 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4210 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4211 if (Size != 32 && Size != 64)
4214 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4217 bool FoldedLoad =
false;
4218 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4219 if (tryFoldLoadOrBCast(Root, ParentC,
C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4221 }
else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
4226 uint8_t OldImm =
Imm;
4227 Imm = OldImm & 0xa5;
4228 if (OldImm & 0x02)
Imm |= 0x10;
4229 if (OldImm & 0x10)
Imm |= 0x02;
4230 if (OldImm & 0x08)
Imm |= 0x40;
4231 if (OldImm & 0x40)
Imm |= 0x08;
4232 }
else if (tryFoldLoadOrBCast(Root, ParentB,
B, Tmp0, Tmp1, Tmp2, Tmp3,
4237 uint8_t OldImm =
Imm;
4238 Imm = OldImm & 0x99;
4239 if (OldImm & 0x02)
Imm |= 0x04;
4240 if (OldImm & 0x04)
Imm |= 0x02;
4241 if (OldImm & 0x20)
Imm |= 0x40;
4242 if (OldImm & 0x40)
Imm |= 0x20;
4257 auto *MemIntr = cast<MemIntrinsicSDNode>(
C);
4258 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4259 assert((EltSize == 32 || EltSize == 64) &&
"Unexpected broadcast size!");
4261 bool UseD = EltSize == 32;
4263 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4265 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4267 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4273 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4275 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4277 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4283 MNode = CurDAG->getMachineNode(Opc,
DL, VTs, Ops);
4286 ReplaceUses(
C.getValue(1),
SDValue(MNode, 1));
4288 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(
C)->getMemOperand()});
4293 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4295 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4297 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4301 MNode = CurDAG->getMachineNode(Opc,
DL, NVT, {
A,
B,
C, TImm});
4305 CurDAG->RemoveDeadNode(Root);
4311 bool X86DAGToDAGISel::tryVPTERNLOG(
SDNode *
N) {
4312 MVT NVT =
N->getSimpleValueType(0);
4315 if (!NVT.
isVector() || !Subtarget->hasAVX512() ||
4326 auto getFoldableLogicOp = [](
SDValue Op) {
4329 Op =
Op.getOperand(0);
4331 if (!
Op.hasOneUse())
4334 unsigned Opc =
Op.getOpcode();
4343 if ((FoldableOp = getFoldableLogicOp(N1))) {
4345 }
else if ((FoldableOp = getFoldableLogicOp(N0))) {
4358 uint8_t TernlogMagicA = 0xf0;
4359 uint8_t TernlogMagicB = 0xcc;
4360 uint8_t TernlogMagicC = 0xaa;
4369 Parent =
Op.getNode();
4370 Op =
Op.getOperand(0);
4374 PeekThroughNot(A, ParentA, TernlogMagicA);
4375 PeekThroughNot(
B, ParentB, TernlogMagicB);
4376 PeekThroughNot(
C, ParentC, TernlogMagicC);
4381 case ISD::AND:
Imm = TernlogMagicB & TernlogMagicC;
break;
4382 case ISD::OR:
Imm = TernlogMagicB | TernlogMagicC;
break;
4383 case ISD::XOR:
Imm = TernlogMagicB ^ TernlogMagicC;
break;
4387 switch (
N->getOpcode()) {
4391 Imm &= ~TernlogMagicA;
4393 Imm = ~(
Imm) & TernlogMagicA;
4400 return matchVPTERNLOG(
N, ParentA, ParentB, ParentC, A,
B,
C,
Imm);
4410 bool X86DAGToDAGISel::shrinkAndImmediate(
SDNode *And) {
4413 MVT VT =
And->getSimpleValueType(0);
4417 auto *And1C = dyn_cast<ConstantSDNode>(
And->getOperand(1));
4426 APInt MaskVal = And1C->getAPIntValue();
4428 if (!MaskLZ || (VT ==
MVT::i64 && MaskLZ == 32))
4432 if (VT ==
MVT::i64 && MaskLZ >= 32) {
4434 MaskVal = MaskVal.
trunc(32);
4439 APInt NegMaskVal = MaskVal | HighZeros;
4449 NegMaskVal = NegMaskVal.
zext(64);
4450 HighZeros = HighZeros.
zext(64);
4455 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4461 ReplaceNode(And, And0.
getNode());
4466 SDValue NewMask = CurDAG->getConstant(NegMaskVal,
SDLoc(And), VT);
4469 ReplaceNode(And, NewAnd.
getNode());
4475 bool FoldedBCast,
bool Masked) {
4476 #define VPTESTM_CASE(VT, SUFFIX) \
4479 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4480 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4483 #define VPTESTM_BROADCAST_CASES(SUFFIX) \
4484 default: llvm_unreachable("Unexpected VT!"); \
4485 VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4486 VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4487 VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4488 VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4489 VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4490 VPTESTM_CASE(v8i64, QZ##SUFFIX)
4492 #define VPTESTM_FULL_CASES(SUFFIX) \
4493 VPTESTM_BROADCAST_CASES(SUFFIX) \
4494 VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4495 VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4496 VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4497 VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4498 VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4499 VPTESTM_CASE(v32i16, WZ##SUFFIX)
4517 #undef VPTESTM_FULL_CASES
4518 #undef VPTESTM_BROADCAST_CASES
4524 bool X86DAGToDAGISel::tryVPTESTM(
SDNode *Root,
SDValue Setcc,
4526 assert(Subtarget->hasAVX512() &&
"Expected AVX512!");
4576 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4587 L = L.getOperand(0);
4593 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4594 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.
getSizeInBits())
4597 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4601 bool CanFoldLoads = Src0 != Src1;
4603 bool FoldedLoad =
false;
4604 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4606 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src1, Tmp0, Tmp1, Tmp2,
4610 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src0, Tmp0, Tmp1,
4619 bool IsMasked = InMask.
getNode() !=
nullptr;
4632 SDValue ImplDef =
SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4634 Src0 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src0);
4637 Src1 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src1);
4643 InMask =
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4644 dl, MaskVT, InMask, RC), 0);
4649 unsigned Opc =
getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4657 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4659 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4661 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4663 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4669 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
4672 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4674 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4681 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4682 dl, ResVT,
SDValue(CNode, 0), RC);
4686 CurDAG->RemoveDeadNode(Root);
4692 bool X86DAGToDAGISel::tryMatchBitSelect(
SDNode *
N) {
4695 MVT NVT =
N->getSimpleValueType(0);
4698 if (!NVT.
isVector() || !Subtarget->hasAVX512())
4741 MVT NVT = Node->getSimpleValueType(0);
4742 unsigned Opcode = Node->getOpcode();
4745 if (Node->isMachineOpcode()) {
4747 Node->setNodeId(-1);
4754 unsigned IntNo = Node->getConstantOperandVal(1);
4757 case Intrinsic::x86_encodekey128:
4758 case Intrinsic::x86_encodekey256: {
4759 if (!Subtarget->hasKL())
4765 case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128;
break;
4766 case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256;
break;
4769 SDValue Chain = Node->getOperand(0);
4770 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
4772 if (Opcode == X86::ENCODEKEY256)
4773 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
4777 Opcode, dl, Node->getVTList(),
4778 {Node->getOperand(2), Chain, Chain.getValue(1)});
4779 ReplaceNode(Node, Res);
4782 case Intrinsic::x86_tileloadd64_internal:
4783 case Intrinsic::x86_tileloaddt164_internal: {
4784 if (!Subtarget->hasAMXTILE())
4786 unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
4788 : X86::PTILELOADDT1V;
4791 SDValue Scale = getI8Imm(1, dl);
4795 SDValue Chain = Node->getOperand(0);
4797 SDValue Ops[] = {Node->getOperand(2),
4798 Node->getOperand(3),
4806 ReplaceNode(Node, CNode);
4813 unsigned IntNo = Node->getConstantOperandVal(1);
4816 case Intrinsic::x86_sse3_monitor:
4817 case Intrinsic::x86_monitorx:
4818 case Intrinsic::x86_clzero: {
4819 bool Use64BitPtr = Node->getOperand(2).getValueType() ==
MVT::i64;
4824 case Intrinsic::x86_sse3_monitor:
4825 if (!Subtarget->hasSSE3())
4827 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4829 case Intrinsic::x86_monitorx:
4830 if (!Subtarget->hasMWAITX())
4832 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4834 case Intrinsic::x86_clzero:
4835 if (!Subtarget->hasCLZERO())
4837 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4842 unsigned PtrReg = Use64BitPtr ? X86::RAX :
X86::EAX;
4843 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4844 Node->getOperand(2),
SDValue());
4847 if (IntNo == Intrinsic::x86_sse3_monitor ||
4848 IntNo == Intrinsic::x86_monitorx) {
4850 Chain = CurDAG->getCopyToReg(Chain, dl,
X86::ECX, Node->getOperand(3),
4853 Chain = CurDAG->getCopyToReg(Chain, dl,
X86::EDX, Node->getOperand(4),
4860 ReplaceNode(Node, CNode);
4866 case Intrinsic::x86_tilestored64_internal: {
4867 unsigned Opc = X86::PTILESTOREDV;
4870 SDValue Scale = getI8Imm(1, dl);
4874 SDValue Chain = Node->getOperand(0);
4876 SDValue Ops[] = {Node->getOperand(2),
4877 Node->getOperand(3),
4883 Node->getOperand(6),
4885 CNode = CurDAG->getMachineNode(Opc, dl,
MVT::Other, Ops);
4886 ReplaceNode(Node, CNode);
4889 case Intrinsic::x86_tileloadd64:
4890 case Intrinsic::x86_tileloaddt164:
4891 case Intrinsic::x86_tilestored64: {
4892 if (!Subtarget->hasAMXTILE())
4897 case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD;
break;
4898 case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1;
break;
4899 case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED;
break;
4902 unsigned TIndex = Node->getConstantOperandVal(2);
4903 SDValue TReg = getI8Imm(TIndex, dl);
4905 SDValue Scale = getI8Imm(1, dl);
4909 SDValue Chain = Node->getOperand(0);
4911 if (Opc == X86::PTILESTORED) {
4913 CNode = CurDAG->getMachineNode(Opc, dl,
MVT::Other, Ops);
4916 CNode = CurDAG->getMachineNode(Opc, dl,
MVT::Other, Ops);
4918 ReplaceNode(Node, CNode);
4926 if (Subtarget->isTargetNaCl())
4930 if (Subtarget->isTarget64BitILP32()) {
4938 Node->getOperand(0), ZextTarget);
4939 ReplaceNode(Node, Brind.
getNode());
4940 SelectCode(ZextTarget.
getNode());
4947 ReplaceNode(Node, getGlobalBaseReg());
4954 ReplaceUses(
SDValue(Node, 0), Node->getOperand(0));
4955 CurDAG->RemoveDeadNode(Node);
4961 if (matchBitExtract(Node))
4966 if (tryShiftAmountMod(Node))
4971 uint8_t
Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
4972 if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
4973 Node->getOperand(1), Node->getOperand(2),
Imm))
4979 if (tryVPTERNLOG(Node))
4986 SDValue N0 = Node->getOperand(0);
4987 SDValue N1 = Node->getOperand(1);
4989 tryVPTESTM(Node, N0, N1))
4992 tryVPTESTM(Node, N1, N0))
4998 CurDAG->RemoveDeadNode(Node);
5001 if (matchBitExtract(Node))
5009 if (tryShrinkShlLogicImm(Node))
5011 if (Opcode ==
ISD::OR && tryMatchBitSelect(Node))
5013 if (tryVPTERNLOG(Node))
5025 if (!CurDAG->shouldOptForSize())
5032 SDValue N0 = Node->getOperand(0);
5033 SDValue N1 = Node->getOperand(1);
5047 if (Opcode ==
ISD::ADD && (Val == 1 || Val == -1))
5051 if (!shouldAvoidImmediateInstFormsForSize(N1.
getNode()))
5055 unsigned ROpc, MOpc;
5061 case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm;
break;
5062 case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm;
break;
5063 case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm;
break;
5064 case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm;
break;
5065 case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm;
break;
5071 case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm;
break;
5072 case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm;
break;
5073 case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm;
break;
5074 case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm;
break;
5075 case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm;
break;
5081 case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm;
break;
5082 case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm;
break;
5083 case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm;
break;
5084 case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm;
break;
5085 case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm;
break;
5091 case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm;
break;
5092 case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm;
break;
5093 case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm;
break;
5094 case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm;
break;
5095 case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm;
break;
5104 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;