22#include "llvm/Config/llvm-config.h"
27#include "llvm/IR/IntrinsicsX86.h"
38#define DEBUG_TYPE "x86-isel"
39#define PASS_NAME "X86 DAG->DAG Instruction Selection"
41STATISTIC(NumLoadMoved,
"Number of loads moved below TokenFactor");
44 cl::desc(
"Enable setting constant bits to reduce size of mask immediates"),
48 "x86-promote-anyext-load",
cl::init(
true),
60 struct X86ISelAddressMode {
68 int Base_FrameIndex = 0;
77 const char *ES =
nullptr;
82 bool NegateIndex =
false;
84 X86ISelAddressMode() =
default;
86 bool hasSymbolicDisplacement()
const {
87 return GV !=
nullptr ||
CP !=
nullptr || ES !=
nullptr ||
88 MCSym !=
nullptr ||
JT != -1 || BlockAddr !=
nullptr;
91 bool hasBaseOrIndexReg()
const {
98 if (
BaseType != RegBase)
return false;
100 dyn_cast_or_null<RegisterSDNode>(Base_Reg.
getNode()))
101 return RegNode->getReg() == X86::RIP;
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
112 dbgs() <<
"X86ISelAddressMode " <<
this <<
'\n';
113 dbgs() <<
"Base_Reg ";
119 dbgs() <<
" Base.FrameIndex " << Base_FrameIndex <<
'\n';
120 dbgs() <<
" Scale " << Scale <<
'\n'
128 dbgs() <<
" Disp " << Disp <<
'\n'
150 dbgs() <<
" JT" <<
JT <<
" Align" << Alignment.
value() <<
'\n';
170 bool IndirectTlsSegRefs;
173 X86DAGToDAGISel() =
delete;
177 OptForMinSize(
false), IndirectTlsSegRefs(
false) {}
183 "indirect-tls-seg-refs");
188 "OptForMinSize implies OptForSize");
200#include "X86GenDAGISel.inc"
205 bool foldOffsetIntoAddress(
uint64_t Offset, X86ISelAddressMode &AM);
206 bool matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
207 bool AllowSegmentRegForX32 =
false);
208 bool matchWrapper(
SDValue N, X86ISelAddressMode &AM);
209 bool matchAddress(
SDValue N, X86ISelAddressMode &AM);
210 bool matchVectorAddress(
SDValue N, X86ISelAddressMode &AM);
211 bool matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
unsigned Depth);
214 bool matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
216 bool matchVectorAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
218 bool matchAddressBase(
SDValue N, X86ISelAddressMode &AM);
247 return tryFoldLoad(
P,
P,
N,
Base, Scale, Index, Disp, Segment);
255 bool isProfitableToFormMaskedOp(
SDNode *
N)
const;
260 std::vector<SDValue> &OutOps)
override;
262 void emitSpecialCodeForMain();
264 inline void getAddressOperands(X86ISelAddressMode &AM,
const SDLoc &
DL,
268 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
269 Base = CurDAG->getTargetFrameIndex(
270 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
271 else if (AM.Base_Reg.getNode())
274 Base = CurDAG->getRegister(0, VT);
276 Scale = getI8Imm(AM.Scale,
DL);
278#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
280 if (AM.NegateIndex) {
288 if (AM.IndexReg.getNode())
291 Index = CurDAG->getRegister(0, VT);
296 Disp = CurDAG->getTargetGlobalAddress(AM.GV,
SDLoc(),
300 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
301 AM.Disp, AM.SymbolFlags);
303 assert(!AM.Disp &&
"Non-zero displacement is ignored with ES.");
304 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
305 }
else if (AM.MCSym) {
306 assert(!AM.Disp &&
"Non-zero displacement is ignored with MCSym.");
307 assert(AM.SymbolFlags == 0 &&
"oo");
308 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
309 }
else if (AM.JT != -1) {
310 assert(!AM.Disp &&
"Non-zero displacement is ignored with JT.");
311 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
312 }
else if (AM.BlockAddr)
313 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
316 Disp = CurDAG->getSignedTargetConstant(AM.Disp,
DL, MVT::i32);
318 if (AM.Segment.getNode())
319 Segment = AM.Segment;
321 Segment = CurDAG->getRegister(0, MVT::i16);
326 bool isAMXSDNode(
SDNode *
N)
const {
332 switch (
N->getOpcode()) {
335 case X86::PT2RPNTLVWZ0V:
336 case X86::PT2RPNTLVWZ0T1V:
337 case X86::PT2RPNTLVWZ1V:
338 case X86::PT2RPNTLVWZ1T1V:
339 case X86::PT2RPNTLVWZ0RSV:
340 case X86::PT2RPNTLVWZ0RST1V:
341 case X86::PT2RPNTLVWZ1RSV:
342 case X86::PT2RPNTLVWZ1RST1V:
345 for (
unsigned Idx = 0, E =
N->getNumValues();
Idx != E; ++
Idx) {
346 if (
N->getValueType(
Idx) == MVT::x86amx)
349 for (
unsigned Idx = 0, E =
N->getNumOperands();
Idx != E; ++
Idx) {
351 if (
Op.getValueType() == MVT::x86amx)
363 bool shouldAvoidImmediateInstFormsForSize(
SDNode *
N)
const {
369 if (!CurDAG->shouldOptForSize())
379 if (
User->isMachineOpcode()) {
402 auto *
C = dyn_cast<ConstantSDNode>(
N);
403 if (
C && isInt<8>(
C->getSExtValue()))
423 (RegNode = dyn_cast_or_null<RegisterSDNode>(
425 if ((RegNode->
getReg() == X86::ESP) ||
426 (RegNode->
getReg() == X86::RSP))
435 return (UseCount > 1);
440 return CurDAG->getTargetConstant(Imm,
DL, MVT::i8);
445 return CurDAG->getTargetConstant(Imm,
DL, MVT::i32);
450 return CurDAG->getTargetConstant(Imm,
DL, MVT::i64);
455 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
457 MVT VecVT =
N->getOperand(0).getSimpleValueType();
463 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
465 MVT VecVT =
N->getSimpleValueType(0);
469 SDValue getPermuteVINSERTCommutedImmediate(
SDNode *
N,
unsigned VecWidth,
471 assert(VecWidth == 128 &&
"Unexpected vector width");
473 MVT VecVT =
N->getSimpleValueType(0);
475 assert((InsertIdx == 0 || InsertIdx == 1) &&
"Bad insertf128 index");
478 return getI8Imm(InsertIdx ? 0x02 : 0x30,
DL);
483 MVT VT =
N->getSimpleValueType(0);
486 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
488 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, {}), 0);
489 if (VT == MVT::i64) {
491 CurDAG->getMachineNode(
492 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
493 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
494 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
499 unsigned Opcode =
N->getOpcode();
501 "Unexpected opcode for SBB materialization");
502 unsigned FlagOpIndex = Opcode ==
X86ISD::SBB ? 2 : 1;
504 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
505 N->getOperand(FlagOpIndex),
SDValue());
509 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
510 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
511 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
513 CurDAG->getMachineNode(Opc, dl, VTs,
514 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
520 bool isUnneededShiftMask(
SDNode *
N,
unsigned Width)
const {
522 const APInt &Val =
N->getConstantOperandAPInt(1);
527 APInt Mask = Val | CurDAG->computeKnownBits(
N->getOperand(0)).Zero;
528 return Mask.countr_one() >= Width;
534 SDNode *getGlobalBaseReg();
545 return Subtarget->getInstrInfo();
558 bool isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const;
562 if (!
N->isNonTemporal())
565 unsigned StoreSize =
N->getMemoryVT().getStoreSize();
567 if (
N->getAlign().value() < StoreSize)
576 return Subtarget->hasSSE41();
578 return Subtarget->hasAVX2();
580 return Subtarget->hasAVX512();
584 bool foldLoadStoreIntoMemOperand(
SDNode *
Node);
587 bool shrinkAndImmediate(
SDNode *
N);
588 bool isMaskZeroExtended(
SDNode *
N)
const;
589 bool tryShiftAmountMod(
SDNode *
N);
590 bool tryShrinkShlLogicImm(
SDNode *
N);
596 bool tryMatchBitSelect(
SDNode *
N);
598 MachineSDNode *emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
600 MachineSDNode *emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
604 bool tryOptimizeRem8Extend(
SDNode *
N);
606 bool onlyUsesZeroFlag(
SDValue Flags)
const;
607 bool hasNoSignFlagUses(
SDValue Flags)
const;
608 bool hasNoCarryFlagUses(
SDValue Flags)
const;
617 ID,
std::make_unique<X86DAGToDAGISel>(tm, OptLevel)) {}
621char X86DAGToDAGISelLegacy::ID = 0;
628 unsigned Opcode =
N->getOpcode();
635 EVT OpVT =
N->getOperand(0).getValueType();
639 OpVT =
N->getOperand(1).getValueType();
641 return Subtarget->hasVLX();
655bool X86DAGToDAGISel::isMaskZeroExtended(
SDNode *
N)
const {
668 if (OptLevel == CodeGenOptLevel::None)
678 if (useNonTemporalLoad(cast<LoadSDNode>(
N)))
683 switch (
U->getOpcode()) {
709 if (
auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
710 if (
Imm->getAPIntValue().isSignedIntN(8))
719 Imm->getAPIntValue().getBitWidth() == 64 &&
720 Imm->getAPIntValue().isIntN(32))
727 (
Imm->getAPIntValue() == UINT8_MAX ||
728 Imm->getAPIntValue() == UINT16_MAX ||
729 Imm->getAPIntValue() == UINT32_MAX))
735 (-
Imm->getAPIntValue()).isSignedIntN(8))
739 (-
Imm->getAPIntValue()).isSignedIntN(8) &&
740 hasNoCarryFlagUses(
SDValue(U, 1)))
765 if (
U->getOperand(0).getOpcode() ==
ISD::SHL &&
769 if (
U->getOperand(1).getOpcode() ==
ISD::SHL &&
777 auto *
C = dyn_cast<ConstantSDNode>(U0.
getOperand(0));
778 if (
C &&
C->getSExtValue() == -2)
783 auto *
C = dyn_cast<ConstantSDNode>(U1.
getOperand(0));
784 if (
C &&
C->getSExtValue() == -2)
798 if (isa<ConstantSDNode>(
U->getOperand(1)))
819bool X86DAGToDAGISel::isProfitableToFormMaskedOp(
SDNode *
N)
const {
822 "Unexpected opcode!");
827 return N->getOperand(1).hasOneUse();
836 if (Chain.
getNode() == Load.getNode())
840 "Unexpected chain operand");
854 Load.getOperand(1), Load.getOperand(2));
858 Ops.
append(Call->op_begin() + 1, Call->op_end());
872 if (Callee.getNode() == Chain.
getNode() || !Callee.hasOneUse())
874 auto *LD = dyn_cast<LoadSDNode>(Callee.getNode());
892 if (isa<MemSDNode>(Chain.
getNode()) &&
893 cast<MemSDNode>(Chain.
getNode())->writeMem())
899 Callee.getValue(1).hasOneUse())
907 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
910 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
911 0x65, 0x66, 0x67, 0xf0, 0xf2};
914 uint8_t Byte = (Imm >> i) & 0xFF;
926 return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
929void X86DAGToDAGISel::PreprocessISelDAG() {
930 bool MadeChange =
false;
932 E = CurDAG->allnodes_end();
I != E; ) {
951 MVT VT =
N->getSimpleValueType(0);
952 int64_t
Imm = cast<ConstantSDNode>(
N)->getSExtValue();
953 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
958 "cf-protection-branch");
961 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT,
false,
true);
962 Complement = CurDAG->getNOT(dl, Complement, VT);
964 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Complement);
974 if (
N->getOpcode() ==
X86ISD::AND && !
N->hasAnyUseOfValue(1)) {
976 N->getOperand(0),
N->getOperand(1));
978 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1002 auto mayPreventLoadFold = [&]() {
1004 N->getOpcode() ==
ISD::ADD && Subtarget->hasAVX() &&
1005 !
N->getOperand(1).hasOneUse();
1008 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
1014 MVT VT =
N->getSimpleValueType(0);
1022 CurDAG->getNode(NewOpcode,
DL, VT,
N->getOperand(0),
AllOnes);
1024 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1031 switch (
N->getOpcode()) {
1033 MVT VT =
N->getSimpleValueType(0);
1035 if (!Subtarget->hasBWI() &&
needBWI(VT)) {
1042 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1045 CurDAG->getIntPtrConstant(Index, dl));
1048 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1057 MVT VT =
N->getSimpleValueType(0);
1059 if (!Subtarget->hasBWI() &&
needBWI(VT)) {
1061 auto *MemNode = cast<MemSDNode>(
N);
1063 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1064 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1065 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1067 MemNode->getMemOperand());
1070 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1073 CurDAG->getIntPtrConstant(Index, dl));
1077 CurDAG->ReplaceAllUsesWith(
N, To);
1088 auto *Ld = cast<LoadSDNode>(
N);
1089 MVT VT =
N->getSimpleValueType(0);
1097 SDValue Chain = Ld->getChain();
1099 auto *UserLd = dyn_cast<LoadSDNode>(
User);
1100 MVT UserVT =
User->getSimpleValueType(0);
1102 UserLd->getBasePtr() ==
Ptr && UserLd->getChain() == Chain &&
1103 !
User->hasAnyUseOfValue(1) &&
1117 CurDAG->getIntPtrConstant(0, dl));
1118 SDValue Res = CurDAG->getBitcast(VT, Extract);
1122 CurDAG->ReplaceAllUsesWith(
N, To);
1131 EVT EleVT =
N->getOperand(0).getValueType().getVectorElementType();
1132 if (EleVT == MVT::i1)
1135 assert(Subtarget->hasSSE41() &&
"Expected SSE4.1 support!");
1136 assert(
N->getValueType(0).getVectorElementType() != MVT::i16 &&
1137 "We can't replace VSELECT with BLENDV in vXi16!");
1139 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(
N->getOperand(0)) ==
1142 N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1143 CurDAG->getTargetConstant(0xCA,
SDLoc(
N), MVT::i8));
1146 N->getOperand(0),
N->getOperand(1),
1150 CurDAG->ReplaceAllUsesWith(
N,
R.getNode());
1163 if (!
N->getSimpleValueType(0).isVector())
1167 switch (
N->getOpcode()) {
1177 if (
N->isStrictFPOpcode())
1179 CurDAG->getNode(NewOpc,
SDLoc(
N), {
N->getValueType(0), MVT::Other},
1180 {
N->getOperand(0),
N->getOperand(1)});
1183 CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1186 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1196 if (!
N->getValueType(0).isVector())
1200 switch (
N->getOpcode()) {
1206 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1207 N->getOperand(0),
N->getOperand(1));
1209 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1218 if (!
N->getValueType(0).isVector())
1222 if (
N->getOperand(0).getScalarValueSizeInBits() == 1) {
1224 "Unexpected opcode for mask vector!");
1232 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1235 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1255 switch (
N->getOpcode()) {
1271 bool IsStrict =
N->isStrictFPOpcode();
1275 {
N->getValueType(0), MVT::Other},
1276 {
N->getOperand(0),
N->getOperand(1),
1277 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1281 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1283 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1294 MVT VT =
N->getSimpleValueType(0);
1295 if (VT.
isVector() || VT == MVT::f128)
1298 MVT VecVT = VT == MVT::f64 ? MVT::v2f64
1299 : VT == MVT::f32 ? MVT::v4f32
1309 if (Subtarget->hasSSE2()) {
1314 switch (
N->getOpcode()) {
1321 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1324 Res = CurDAG->getNode(
N->getOpcode(), dl, VecVT, Op0, Op1);
1327 CurDAG->getIntPtrConstant(0, dl));
1329 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1336 if (OptLevel != CodeGenOptLevel::None &&
1339 !Subtarget->useIndirectThunkCalls() &&
1340 ((
N->getOpcode() ==
X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1342 (Subtarget->is64Bit() ||
1343 !getTargetMachine().isPositionIndependent())))) {
1382 switch (
N->getOpcode()) {
1387 MVT SrcVT =
N->getOperand(0).getSimpleValueType();
1388 MVT DstVT =
N->getSimpleValueType(0);
1400 if (SrcIsSSE && DstIsSSE)
1403 if (!SrcIsSSE && !DstIsSSE) {
1408 if (
N->getConstantOperandVal(1))
1416 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1417 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1425 CurDAG->getEntryNode(), dl,
N->getOperand(0), MemTmp, MPI, MemVT);
1427 MemTmp, MPI, MemVT);
1434 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Result);
1443 MVT SrcVT =
N->getOperand(1).getSimpleValueType();
1444 MVT DstVT =
N->getSimpleValueType(0);
1456 if (SrcIsSSE && DstIsSSE)
1459 if (!SrcIsSSE && !DstIsSSE) {
1464 if (
N->getConstantOperandVal(2))
1472 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1473 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1483 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1484 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), MemTmp};
1488 if (
N->getFlags().hasNoFPExcept()) {
1490 Flags.setNoFPExcept(
true);
1491 Store->setFlags(Flags);
1494 assert(SrcVT == MemVT &&
"Unexpected VT!");
1495 Store = CurDAG->getStore(
N->getOperand(0), dl,
N->getOperand(1), MemTmp,
1500 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1502 Result = CurDAG->getMemIntrinsicNode(
1505 if (
N->getFlags().hasNoFPExcept()) {
1507 Flags.setNoFPExcept(
true);
1511 assert(DstVT == MemVT &&
"Unexpected VT!");
1512 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1520 CurDAG->ReplaceAllUsesWith(
N,
Result.getNode());
1534 CurDAG->RemoveDeadNodes();
1538bool X86DAGToDAGISel::tryOptimizeRem8Extend(
SDNode *
N) {
1539 unsigned Opc =
N->getMachineOpcode();
1540 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1541 Opc != X86::MOVSX64rr8)
1553 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1554 : X86::MOVSX32rr8_NOREX;
1559 if (Opc == X86::MOVSX64rr8) {
1564 ReplaceUses(
N, Extend);
1573void X86DAGToDAGISel::PostprocessISelDAG() {
1575 if (
TM.getOptLevel() == CodeGenOptLevel::None)
1580 bool MadeChange =
false;
1581 while (Position != CurDAG->allnodes_begin()) {
1584 if (
N->use_empty() || !
N->isMachineOpcode())
1587 if (tryOptimizeRem8Extend(
N)) {
1592 unsigned Opc =
N->getMachineOpcode();
1603 case X86::CTEST16rr:
1604 case X86::CTEST32rr:
1605 case X86::CTEST64rr: {
1606 auto &Op0 =
N->getOperand(0);
1611#define CASE_ND(OP) \
1614 switch (
And.getMachineOpcode()) {
1621 if (
And->hasAnyUseOfValue(1))
1624 Ops[0] =
And.getOperand(0);
1625 Ops[1] =
And.getOperand(1);
1627 CurDAG->getMachineNode(Opc,
SDLoc(
N), MVT::i32, Ops);
1628 ReplaceUses(
N,
Test);
1636 if (
And->hasAnyUseOfValue(1))
1639 bool IsCTESTCC = X86::isCTESTCC(Opc);
1640#define FROM_TO(A, B) \
1641 CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B; \
1643 switch (
And.getMachineOpcode()) {
1653 And.getOperand(3),
And.getOperand(4),
1654 And.getOperand(5),
And.getOperand(0)};
1667 NewOpc,
SDLoc(
N), MVT::i32, MVT::Other, Ops);
1668 CurDAG->setNodeMemRefs(
1669 Test, cast<MachineSDNode>(
And.getNode())->memoperands());
1681 case X86::KORTESTBkk:
1682 case X86::KORTESTWkk:
1683 case X86::KORTESTDkk:
1684 case X86::KORTESTQkk: {
1686 if (Op0 !=
N->getOperand(1) || !
N->isOnlyUserOf(Op0.
getNode()) ||
1701#define FROM_TO(A, B) \
1713 if (NewOpc == X86::KTESTWkk && !Subtarget->hasDQI())
1718 ReplaceUses(
N, KTest);
1723 case TargetOpcode::SUBREG_TO_REG: {
1724 unsigned SubRegIdx =
N->getConstantOperandVal(2);
1725 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1742 CASE(VMOVAPDZ128rr)
CASE(VMOVUPDZ128rr)
1743 CASE(VMOVAPSZ128rr)
CASE(VMOVUPSZ128rr)
1744 CASE(VMOVDQA32Z128rr)
CASE(VMOVDQU32Z128rr)
1745 CASE(VMOVDQA64Z128rr)
CASE(VMOVDQU64Z128rr)
1746 CASE(VMOVAPDZ256rr)
CASE(VMOVUPDZ256rr)
1747 CASE(VMOVAPSZ256rr)
CASE(VMOVUPSZ256rr)
1748 CASE(VMOVDQA32Z256rr)
CASE(VMOVDQU32Z256rr)
1749 CASE(VMOVDQA64Z256rr)
CASE(VMOVDQU64Z256rr)
1754 if (!
In.isMachineOpcode() ||
1755 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1760 uint64_t TSFlags = getInstrInfo()->get(
In.getMachineOpcode()).TSFlags;
1768 CurDAG->UpdateNodeOperands(
N,
N->getOperand(0), In,
N->getOperand(2));
1775 CurDAG->RemoveDeadNodes();
1780void X86DAGToDAGISel::emitSpecialCodeForMain() {
1781 if (Subtarget->isTargetCygMing()) {
1783 auto &
DL = CurDAG->getDataLayout();
1786 CLI.setChain(CurDAG->getRoot())
1788 CurDAG->getExternalSymbol(
"__main", TLI->getPointerTy(
DL)),
1792 CurDAG->setRoot(
Result.second);
1796void X86DAGToDAGISel::emitFunctionEntryCode() {
1799 if (
F.hasExternalLinkage() &&
F.getName() ==
"main")
1800 emitSpecialCodeForMain();
1810 return isInt<31>(Val);
1814 X86ISelAddressMode &AM) {
1819 int64_t Val = AM.Disp +
Offset;
1822 if (Val != 0 && (AM.ES || AM.MCSym))
1826 if (Subtarget->is64Bit()) {
1829 AM.hasSymbolicDisplacement()))
1833 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1852 if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
1853 !AM.hasBaseOrIndexReg())
1860bool X86DAGToDAGISel::matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
1861 bool AllowSegmentRegForX32) {
1873 if (
isNullConstant(Address) && AM.Segment.getNode() ==
nullptr &&
1874 !IndirectTlsSegRefs &&
1875 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1876 Subtarget->isTargetFuchsia())) {
1877 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1879 switch (
N->getPointerInfo().getAddrSpace()) {
1881 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1884 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1897bool X86DAGToDAGISel::matchWrapper(
SDValue N, X86ISelAddressMode &AM) {
1900 if (AM.hasSymbolicDisplacement())
1903 bool IsRIPRelTLS =
false;
1921 if (IsRIPRel && AM.hasBaseOrIndexReg())
1925 X86ISelAddressMode Backup = AM;
1929 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(N0)) {
1930 AM.GV =
G->getGlobal();
1931 AM.SymbolFlags =
G->getTargetFlags();
1933 }
else if (
auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1934 AM.CP =
CP->getConstVal();
1935 AM.Alignment =
CP->getAlign();
1936 AM.SymbolFlags =
CP->getTargetFlags();
1938 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1939 AM.ES = S->getSymbol();
1940 AM.SymbolFlags = S->getTargetFlags();
1941 }
else if (
auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1942 AM.MCSym = S->getMCSymbol();
1943 }
else if (
auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1944 AM.JT = J->getIndex();
1945 AM.SymbolFlags = J->getTargetFlags();
1946 }
else if (
auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1947 AM.BlockAddr = BA->getBlockAddress();
1948 AM.SymbolFlags = BA->getTargetFlags();
1949 Offset = BA->getOffset();
1954 if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
1955 TM.isLargeGlobalValue(AM.GV)) {
1960 if (foldOffsetIntoAddress(
Offset, AM)) {
1966 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1974bool X86DAGToDAGISel::matchAddress(
SDValue N, X86ISelAddressMode &AM) {
1975 if (matchAddressRecursively(
N, AM, 0))
1982 if (Subtarget->isTarget64BitILP32() &&
1983 AM.BaseType == X86ISelAddressMode::RegBase &&
1984 AM.Base_Reg.getNode() !=
nullptr && AM.IndexReg.getNode() ==
nullptr) {
1985 SDValue Save_Base_Reg = AM.Base_Reg;
1986 if (
auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1988 if (matchLoadInAddress(LoadN, AM,
true))
1989 AM.Base_Reg = Save_Base_Reg;
1995 if (AM.Scale == 2 &&
1996 AM.BaseType == X86ISelAddressMode::RegBase &&
1997 AM.Base_Reg.getNode() ==
nullptr) {
1998 AM.Base_Reg = AM.IndexReg;
2005 (!AM.GV || !
TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
2006 AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase &&
2007 AM.Base_Reg.getNode() ==
nullptr && AM.IndexReg.getNode() ==
nullptr &&
2016 if (isa_and_nonnull<Function>(AM.GV) && AM.Disp < -16 * 1024 * 1024)
2019 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
2025bool X86DAGToDAGISel::matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
2031 X86ISelAddressMode Backup = AM;
2032 if (!matchAddressRecursively(
N.getOperand(0), AM,
Depth+1) &&
2033 !matchAddressRecursively(Handle.getValue().getOperand(1), AM,
Depth+1))
2038 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
2040 !matchAddressRecursively(Handle.getValue().getOperand(0), AM,
Depth + 1))
2047 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2048 !AM.Base_Reg.getNode() &&
2049 !AM.IndexReg.getNode()) {
2050 N = Handle.getValue();
2051 AM.Base_Reg =
N.getOperand(0);
2052 AM.IndexReg =
N.getOperand(1);
2056 N = Handle.getValue();
2066 if (
N->getNodeId() == -1 ||
2086 X86ISelAddressMode &AM) {
2093 if (ScaleLog <= 0 || ScaleLog >= 4 ||
2094 Mask != (0xffu << ScaleLog))
2097 MVT XVT =
X.getSimpleValueType();
2098 MVT VT =
N.getSimpleValueType();
2123 AM.Scale = (1 << ScaleLog);
2131 X86ISelAddressMode &AM) {
2137 int64_t Mask = cast<ConstantSDNode>(
N->getOperand(1))->getSExtValue();
2142 bool FoundAnyExtend =
false;
2146 FoundAnyExtend =
true;
2164 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
2167 MVT VT =
N.getSimpleValueType();
2169 if (FoundAnyExtend) {
2190 AM.Scale = 1 << ShiftAmt;
2191 AM.IndexReg = NewAnd;
2225 X86ISelAddressMode &AM) {
2231 unsigned MaskIdx, MaskLen;
2234 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2240 unsigned AMShiftAmt = MaskIdx;
2244 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2248 unsigned ScaleDown = (64 -
X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2249 if (MaskLZ < ScaleDown)
2251 MaskLZ -= ScaleDown;
2259 bool ReplacingAnyExtend =
false;
2261 unsigned ExtendBits =
X.getSimpleValueType().getSizeInBits() -
2262 X.getOperand(0).getSimpleValueType().getSizeInBits();
2265 X =
X.getOperand(0);
2266 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2267 ReplacingAnyExtend =
true;
2269 APInt MaskedHighBits =
2276 MVT VT =
N.getSimpleValueType();
2277 if (ReplacingAnyExtend) {
2278 assert(
X.getValueType() != VT);
2285 MVT XVT =
X.getSimpleValueType();
2306 AM.Scale = 1 << AMShiftAmt;
2307 AM.IndexReg = NewExt;
2317 X86ISelAddressMode &AM,
2325 if (!Subtarget.hasTBM() &&
2326 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2330 unsigned MaskIdx, MaskLen;
2338 unsigned AMShiftAmt = MaskIdx;
2342 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2344 MVT XVT =
X.getSimpleValueType();
2345 MVT VT =
N.getSimpleValueType();
2370 AM.Scale = 1 << AMShiftAmt;
2371 AM.IndexReg = NewExt;
2378 X86ISelAddressMode &AM,
2380 assert(AM.IndexReg.getNode() ==
nullptr &&
"IndexReg already matched");
2381 assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
2382 "Illegal index scale");
2388 EVT VT =
N.getValueType();
2389 unsigned Opc =
N.getOpcode();
2392 if (CurDAG->isBaseWithConstantOffset(
N)) {
2393 auto *AddVal = cast<ConstantSDNode>(
N.getOperand(1));
2395 if (!foldOffsetIntoAddress(
Offset, AM))
2396 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2400 if (Opc ==
ISD::ADD &&
N.getOperand(0) ==
N.getOperand(1)) {
2401 if (AM.Scale <= 4) {
2403 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2409 uint64_t ShiftAmt =
N.getConstantOperandVal(1);
2410 uint64_t ScaleAmt = 1ULL << ShiftAmt;
2411 if ((AM.Scale * ScaleAmt) <= 8) {
2412 AM.Scale *= ScaleAmt;
2413 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2421 if (Src.getOpcode() ==
ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2423 if (CurDAG->isBaseWithConstantOffset(Src)) {
2424 SDValue AddSrc = Src.getOperand(0);
2425 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2426 int64_t
Offset = AddVal->getSExtValue();
2429 SDValue ExtSrc = CurDAG->getNode(Opc,
DL, VT, AddSrc);
2435 CurDAG->ReplaceAllUsesWith(
N, ExtAdd);
2436 CurDAG->RemoveDeadNode(
N.getNode());
2448 unsigned SrcOpc = Src.getOpcode();
2449 if (((SrcOpc ==
ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2450 CurDAG->isADDLike(Src,
true)) &&
2452 if (CurDAG->isBaseWithConstantOffset(Src)) {
2453 SDValue AddSrc = Src.getOperand(0);
2455 if (!foldOffsetIntoAddress(
Offset * AM.Scale, AM)) {
2466 if ((AM.Scale * ScaleAmt) <= 8 &&
2468 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
2469 AM.Scale *= ScaleAmt;
2470 SDValue ExtShVal = CurDAG->getNode(Opc,
DL, VT, ShVal);
2479 SDValue ExtSrc = CurDAG->getNode(Opc,
DL, VT, AddSrc);
2481 SDValue ExtAdd = CurDAG->getNode(SrcOpc,
DL, VT, ExtSrc, ExtVal);
2485 CurDAG->ReplaceAllUsesWith(
N, ExtAdd);
2486 CurDAG->RemoveDeadNode(
N.getNode());
2487 return Res ? Res : ExtSrc;
2497bool X86DAGToDAGISel::matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
2501 dbgs() <<
"MatchAddress: ";
2506 return matchAddressBase(
N, AM);
2511 if (AM.isRIPRelative()) {
2515 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2518 if (
auto *Cst = dyn_cast<ConstantSDNode>(
N))
2519 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2524 switch (
N.getOpcode()) {
2527 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2528 if (
const auto *ESNode = dyn_cast<MCSymbolSDNode>(
N.getOperand(0))) {
2530 AM.MCSym = ESNode->getMCSymbol();
2536 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2537 if (!foldOffsetIntoAddress(Val, AM))
2544 if (!matchWrapper(
N, AM))
2549 if (!matchLoadInAddress(cast<LoadSDNode>(
N), AM))
2554 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2555 AM.Base_Reg.getNode() ==
nullptr &&
2557 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2558 AM.Base_FrameIndex = cast<FrameIndexSDNode>(
N)->getIndex();
2564 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2567 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
2568 unsigned Val = CN->getZExtValue();
2573 if (Val == 1 || Val == 2 || Val == 3) {
2575 AM.Scale = 1 << Val;
2576 AM.IndexReg = matchIndexRecursively(ShVal, AM,
Depth + 1);
2584 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2588 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2589 "Unexpected value size!");
2598 if (!isa<ConstantSDNode>(
N.getOperand(1)) ||
2599 !isa<ConstantSDNode>(
And.getOperand(1)))
2601 uint64_t Mask =
And.getConstantOperandVal(1) >>
N.getConstantOperandVal(1);
2613 if (
N.getResNo() != 0)
break;
2618 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2619 AM.Base_Reg.getNode() ==
nullptr &&
2620 AM.IndexReg.getNode() ==
nullptr) {
2621 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1)))
2622 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2623 CN->getZExtValue() == 9) {
2624 AM.Scale =
unsigned(CN->getZExtValue())-1;
2635 auto *AddVal = cast<ConstantSDNode>(MulVal.
getOperand(1));
2636 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2637 if (foldOffsetIntoAddress(Disp, AM))
2638 Reg =
N.getOperand(0);
2640 Reg =
N.getOperand(0);
2643 AM.IndexReg = AM.Base_Reg =
Reg;
2662 X86ISelAddressMode Backup = AM;
2663 if (matchAddressRecursively(
N.getOperand(0), AM,
Depth+1)) {
2664 N = Handle.getValue();
2668 N = Handle.getValue();
2670 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2685 RHS.getOperand(0).getValueType() == MVT::i32))
2689 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2690 !AM.Base_Reg.getNode()->hasOneUse()) ||
2691 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2695 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2696 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2697 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2709 AM.NegateIndex =
true;
2717 if (!CurDAG->isADDLike(
N))
2721 if (!matchAdd(
N, AM,
Depth))
2730 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2734 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2735 "Unexpected value size!");
2737 if (!isa<ConstantSDNode>(
N.getOperand(1)))
2740 if (
N.getOperand(0).getOpcode() ==
ISD::SRL) {
2769 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2777 if (
SDValue Index = matchIndexRecursively(
N, AM,
Depth + 1))
2779 AM.IndexReg =
Index;
2785 if (Src.getOpcode() ==
ISD::AND && Src.hasOneUse())
2786 if (
auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2787 Mask = MaskC->getAPIntValue();
2788 Src = Src.getOperand(0);
2791 if (Src.getOpcode() ==
ISD::SHL && Src.hasOneUse() &&
N->hasOneUse()) {
2793 SDValue ShlSrc = Src.getOperand(0);
2794 SDValue ShlAmt = Src.getOperand(1);
2795 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2798 unsigned ShAmtV = ShAmtC->getZExtValue();
2806 if (!Src->getFlags().hasNoUnsignedWrap() &&
2807 !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2815 MVT VT =
N.getSimpleValueType();
2819 if (!
Mask.isAllOnes()) {
2820 Res = CurDAG->getConstant(
Mask.lshr(ShAmtV),
DL, SrcVT);
2822 Res = CurDAG->getNode(
ISD::AND,
DL, SrcVT, ShlSrc, Res);
2829 CurDAG->ReplaceAllUsesWith(
N, NewShl);
2830 CurDAG->RemoveDeadNode(
N.getNode());
2833 AM.Scale = 1 << ShAmtV;
2837 AM.IndexReg = matchIndexRecursively(Zext, AM,
Depth + 1);
2841 if (Src.getOpcode() ==
ISD::SRL && !
Mask.isAllOnes()) {
2844 Src.getOperand(0), AM))
2849 Src.getOperand(0), AM))
2854 Src.getOperand(0), AM, *Subtarget))
2862 return matchAddressBase(
N, AM);
2867bool X86DAGToDAGISel::matchAddressBase(
SDValue N, X86ISelAddressMode &AM) {
2869 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2871 if (!AM.IndexReg.getNode()) {
2882 AM.BaseType = X86ISelAddressMode::RegBase;
2887bool X86DAGToDAGISel::matchVectorAddressRecursively(
SDValue N,
2888 X86ISelAddressMode &AM,
2892 dbgs() <<
"MatchVectorAddress: ";
2897 return matchAddressBase(
N, AM);
2900 switch (
N.getOpcode()) {
2902 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2903 if (!foldOffsetIntoAddress(Val, AM))
2908 if (!matchWrapper(
N, AM))
2916 X86ISelAddressMode Backup = AM;
2917 if (!matchVectorAddressRecursively(
N.getOperand(0), AM,
Depth + 1) &&
2918 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2924 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2926 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2931 N = Handle.getValue();
2936 return matchAddressBase(
N, AM);
2942bool X86DAGToDAGISel::matchVectorAddress(
SDValue N, X86ISelAddressMode &AM) {
2943 return matchVectorAddressRecursively(
N, AM, 0);
2951 X86ISelAddressMode AM;
2957 AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
2959 AM.IndexReg = IndexOp;
2963 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2965 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2967 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2973 if (matchVectorAddress(BasePtr, AM))
2976 getAddressOperands(AM,
DL, VT,
Base, Scale, Index, Disp, Segment);
2990 X86ISelAddressMode AM;
3002 unsigned AddrSpace =
3003 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
3005 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
3007 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
3009 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
3014 MVT VT =
N.getSimpleValueType();
3016 if (matchAddress(
N, AM))
3019 getAddressOperands(AM,
DL, VT,
Base, Scale, Index, Disp, Segment);
3035 N =
N.getOperand(0);
3050 const GlobalValue *GV = cast<GlobalAddressSDNode>(
N)->getGlobal();
3052 return CR->getUnsignedMax().ult(1ull << 32);
3054 return !
TM.isLargeGlobalValue(GV);
3063 if (!selectLEAAddr(
N,
Base, Scale, Index, Disp, Segment))
3066 auto *
RN = dyn_cast<RegisterSDNode>(
Base);
3067 if (RN &&
RN->getReg() == 0)
3068 Base = CurDAG->getRegister(0, MVT::i64);
3069 else if (
Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(
Base)) {
3073 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL, MVT::i64, ImplDef,
3077 RN = dyn_cast<RegisterSDNode>(Index);
3078 if (RN &&
RN->getReg() == 0)
3079 Index = CurDAG->getRegister(0, MVT::i64);
3082 "Expect to be extending 32-bit registers for use in LEA");
3085 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL, MVT::i64, ImplDef,
3094bool X86DAGToDAGISel::selectLEAAddr(
SDValue N,
3098 X86ISelAddressMode AM;
3102 MVT VT =
N.getSimpleValueType();
3107 SDValue T = CurDAG->getRegister(0, MVT::i32);
3109 if (matchAddress(
N, AM))
3114 unsigned Complexity = 0;
3115 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
3117 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
3120 if (AM.IndexReg.getNode())
3133 if (AM.hasSymbolicDisplacement()) {
3135 if (Subtarget->is64Bit())
3145 auto isMathWithFlags = [](
SDValue V) {
3146 switch (
V.getOpcode()) {
3167 if (isMathWithFlags(
N.getOperand(0)) || isMathWithFlags(
N.getOperand(1)))
3175 if (Complexity <= 2)
3178 getAddressOperands(AM,
DL, VT,
Base, Scale, Index, Disp, Segment);
3189 X86ISelAddressMode AM;
3190 if (
auto *GA = dyn_cast<GlobalAddressSDNode>(
N)) {
3191 AM.GV = GA->getGlobal();
3192 AM.Disp += GA->getOffset();
3193 AM.SymbolFlags = GA->getTargetFlags();
3195 auto *SA = cast<ExternalSymbolSDNode>(
N);
3196 AM.ES = SA->getSymbol();
3197 AM.SymbolFlags = SA->getTargetFlags();
3200 if (Subtarget->is32Bit()) {
3202 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
3205 MVT VT =
N.getSimpleValueType();
3206 getAddressOperands(AM,
SDLoc(
N), VT,
Base, Scale, Index, Disp, Segment);
3214 EVT VT =
N.getValueType();
3215 bool WasTruncated =
false;
3217 WasTruncated =
true;
3218 N =
N.getOperand(0);
3227 unsigned Opc =
N.getOperand(0)->getOpcode();
3229 Op =
N.getOperand(0);
3232 return !WasTruncated;
3236 auto *GA = cast<GlobalAddressSDNode>(
N.getOperand(0));
3237 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3238 if (!CR || CR->getUnsignedMax().uge(1ull << VT.
getSizeInBits()))
3242 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
SDLoc(
N), VT,
3243 GA->getOffset(), GA->getTargetFlags());
3251 assert(Root &&
P &&
"Unknown root/parent nodes");
3253 !IsProfitableToFold(
N,
P, Root) ||
3254 !IsLegalToFold(
N,
P, Root, OptLevel))
3257 return selectAddr(
N.getNode(),
3258 N.getOperand(1),
Base, Scale, Index, Disp, Segment);
3265 assert(Root &&
P &&
"Unknown root/parent nodes");
3267 !IsProfitableToFold(
N,
P, Root) ||
3268 !IsLegalToFold(
N,
P, Root, OptLevel))
3271 return selectAddr(
N.getNode(),
3272 N.getOperand(1),
Base, Scale, Index, Disp, Segment);
3278SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
3279 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3281 return CurDAG->getRegister(GlobalBaseReg, TLI->
getPointerTy(
DL)).getNode();
3284bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const {
3286 N =
N->getOperand(0).getNode();
3290 auto *GA = dyn_cast<GlobalAddressSDNode>(
N->getOperand(0));
3294 auto *GV = GA->getGlobal();
3297 return CR->getSignedMin().sge(-1ull << Width) &&
3298 CR->getSignedMax().slt(1ull << Width);
3303 return Width == 32 && !
TM.isLargeGlobalValue(GV);
3307 assert(
N->isMachineOpcode() &&
"Unexpected node");
3308 unsigned Opc =
N->getMachineOpcode();
3309 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3314 return static_cast<X86::CondCode>(
N->getConstantOperandVal(CondNo));
3319bool X86DAGToDAGISel::onlyUsesZeroFlag(
SDValue Flags)
const {
3323 if (
Use.getResNo() !=
Flags.getResNo())
3328 cast<RegisterSDNode>(
User->
getOperand(1))->getReg() != X86::EFLAGS)
3333 if (FlagUse.getResNo() != 1)
3336 if (!FlagUse.getUser()->isMachineOpcode())
3356bool X86DAGToDAGISel::hasNoSignFlagUses(
SDValue Flags)
const {
3360 if (
Use.getResNo() !=
Flags.getResNo())
3365 cast<RegisterSDNode>(
User->
getOperand(1))->getReg() != X86::EFLAGS)
3370 if (FlagUse.getResNo() != 1)
3373 if (!FlagUse.getUser()->isMachineOpcode())
3413 bool X86DAGToDAGISel::hasNoCarryFlagUses(
SDValue Flags)
const {
3417 if (
Use.getResNo() !=
Flags.getResNo())
3421 unsigned UserOpc =
User->getOpcode();
3425 if (cast<RegisterSDNode>(
User->
getOperand(1))->getReg() != X86::EFLAGS)
3430 if (FlagUse.getResNo() != 1)
3433 if (!FlagUse.getUser()->isMachineOpcode())
3474 if (StoredVal.
getResNo() != 0)
return false;
3488 LoadNode = cast<LoadSDNode>(Load);
3491 if (!Load.hasOneUse())
3499 bool FoundLoad =
false;
3503 const unsigned int Max = 1024;
3545 if (Chain == Load.getValue(1)) {
3551 if (
Op == Load.getValue(1)) {
3567 if (
Op.getNode() != LoadNode)
3599bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(
SDNode *
Node) {
3600 auto *StoreNode = cast<StoreSDNode>(
Node);
3607 EVT MemVT = StoreNode->getMemoryVT();
3608 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3612 bool IsCommutable =
false;
3613 bool IsNegate =
false;
3627 IsCommutable =
true;
3631 unsigned LoadOpNo = IsNegate ? 1 : 0;
3635 LoadNode, InputChain)) {
3642 LoadNode, InputChain))
3647 if (!selectAddr(LoadNode, LoadNode->
getBasePtr(),
Base, Scale, Index, Disp,
3651 auto SelectOpcode = [&](
unsigned Opc64,
unsigned Opc32,
unsigned Opc16,
3672 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3682 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3686 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3689 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3690 : SelectOpcode(
X86::DEC64m,
X86::DEC32m,
X86::DEC16m,
X86::DEC8m);
3703 auto SelectRegOpcode = [SelectOpcode](
unsigned Opc) {
3706 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3709 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3712 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3715 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3718 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3721 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3723 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3729 auto SelectImmOpcode = [SelectOpcode](
unsigned Opc) {
3732 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3735 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3738 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3741 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3744 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3747 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3750 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3757 unsigned NewOpc = SelectRegOpcode(Opc);
3762 if (
auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3763 int64_t OperandV = OperandC->getSExtValue();
3769 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3770 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3771 isInt<32>(-OperandV))) &&
3772 hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3773 OperandV = -OperandV;
3777 if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3778 Operand = CurDAG->getSignedTargetConstant(OperandV,
SDLoc(
Node), MemVT);
3779 NewOpc = SelectImmOpcode(Opc);
3785 CurDAG->getCopyToReg(InputChain,
SDLoc(
Node), X86::EFLAGS,
3789 Segment, Operand, CopyTo, CopyTo.
getValue(1)};
3790 Result = CurDAG->getMachineNode(NewOpc,
SDLoc(
Node), MVT::i32, MVT::Other,
3794 Segment, Operand, InputChain};
3795 Result = CurDAG->getMachineNode(NewOpc,
SDLoc(
Node), MVT::i32, MVT::Other,
3806 CurDAG->setNodeMemRefs(Result, MemOps);
3812 CurDAG->RemoveDeadNode(
Node);
3823bool X86DAGToDAGISel::matchBitExtract(
SDNode *
Node) {
3827 "Should be either an and-mask, or right-shift after clearing high bits.");
3830 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3833 MVT NVT =
Node->getSimpleValueType(0);
3836 if (NVT != MVT::i32 && NVT != MVT::i64)
3844 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3845 auto checkUses = [AllowExtraUsesByDefault](
3847 std::optional<bool> AllowExtraUses) {
3848 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3849 Op.getNode()->hasNUsesOfValue(NUses,
Op.getResNo());
3851 auto checkOneUse = [checkUses](
SDValue Op,
3852 std::optional<bool> AllowExtraUses =
3854 return checkUses(
Op, 1, AllowExtraUses);
3856 auto checkTwoUse = [checkUses](
SDValue Op,
3857 std::optional<bool> AllowExtraUses =
3859 return checkUses(
Op, 2, AllowExtraUses);
3862 auto peekThroughOneUseTruncation = [checkOneUse](
SDValue V) {
3864 assert(
V.getSimpleValueType() == MVT::i32 &&
3865 V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3866 "Expected i64 -> i32 truncation");
3867 V =
V.getOperand(0);
3873 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3876 if (
Mask->getOpcode() !=
ISD::ADD || !checkOneUse(Mask))
3882 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3887 NBits =
M0->getOperand(1);
3888 NegateNBits =
false;
3892 auto isAllOnes = [
this, peekThroughOneUseTruncation, NVT](
SDValue V) {
3893 V = peekThroughOneUseTruncation(V);
3894 return CurDAG->MaskedValueIsAllOnes(
3900 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3903 if (
Mask.getOpcode() !=
ISD::XOR || !checkOneUse(Mask))
3906 if (!isAllOnes(
Mask->getOperand(1)))
3909 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3913 if (!isAllOnes(
M0->getOperand(0)))
3915 NBits =
M0->getOperand(1);
3916 NegateNBits =
false;
3922 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](
SDValue ShiftAmt,
3923 unsigned Bitwidth) {
3928 NBits = NBits.getOperand(0);
3933 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3934 if (!V0 || V0->getZExtValue() != Bitwidth)
3936 NBits = NBits.getOperand(1);
3937 NegateNBits =
false;
3943 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3946 Mask = peekThroughOneUseTruncation(Mask);
3947 unsigned Bitwidth =
Mask.getSimpleValueType().getSizeInBits();
3949 if (
Mask.getOpcode() !=
ISD::SRL || !checkOneUse(Mask))
3956 if (!checkOneUse(
M1))
3958 canonicalizeShiftAmt(
M1, Bitwidth);
3963 return !NegateNBits;
3971 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3972 AllowExtraUsesByDefault, &NegateNBits,
3985 canonicalizeShiftAmt(N1, Bitwidth);
3989 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3990 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3996 auto matchLowBitMask = [matchPatternA, matchPatternB,
3998 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
4002 X =
Node->getOperand(0);
4005 if (matchLowBitMask(Mask)) {
4009 if (!matchLowBitMask(Mask))
4013 X = CurDAG->getAllOnesConstant(
SDLoc(
Node), NVT);
4014 }
else if (!matchPatternD(
Node))
4019 if (NegateNBits && !Subtarget->hasBMI2())
4031 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i32), 0);
4034 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit,
DL, MVT::i32);
4036 NBits =
SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
DL,
4037 MVT::i32, ImplDef, NBits, SRIdxVal),
4047 NBits = CurDAG->getNode(
ISD::SUB,
DL, MVT::i32, BitWidthC, NBits);
4051 if (Subtarget->hasBMI2()) {
4053 if (NVT != MVT::i32) {
4061 SelectCode(Extract.
getNode());
4070 SDValue RealX = peekThroughOneUseTruncation(
X);
4076 MVT XVT =
X.getSimpleValueType();
4086 SDValue C8 = CurDAG->getConstant(8,
DL, MVT::i8);
4094 SDValue ShiftAmt =
X.getOperand(1);
4095 X =
X.getOperand(0);
4098 "Expected shift amount to be i8");
4102 SDValue OrigShiftAmt = ShiftAmt;
4107 Control = CurDAG->getNode(
ISD::OR,
DL, MVT::i32, Control, ShiftAmt);
4112 if (XVT != MVT::i32) {
4127 SelectCode(Extract.
getNode());
4134 MVT NVT =
Node->getSimpleValueType(0);
4147 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
4148 if (!PreferBEXTR && !Subtarget->hasBMI2())
4160 if (NVT != MVT::i32 && NVT != MVT::i64)
4164 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
4165 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
4166 if (!MaskCst || !ShiftCst)
4174 uint64_t Shift = ShiftCst->getZExtValue();
4179 if (Shift == 8 && MaskSize == 8)
4190 if (!PreferBEXTR && MaskSize <= 32)
4194 unsigned ROpc, MOpc;
4196#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
4198 assert(Subtarget->hasBMI2() &&
"We must have BMI2's BZHI then.");
4202 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4207 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4208 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4214 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4215 if (Subtarget->hasTBM()) {
4216 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
4217 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
4219 assert(Subtarget->hasBMI() &&
"We must have BMI1's BEXTR then.");
4225 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4226 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4232 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4233 if (tryFoldLoad(
Node, N0.
getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4235 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.
getOperand(0)};
4236 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4237 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4241 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4243 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4248 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4252 CurDAG->getMachineNode(NewOpc, dl, NVT,
SDValue(NewNode, 0), ShAmt);
4259MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
4260 bool MayFoldLoad,
const SDLoc &dl,
4265 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4266 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
4269 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4270 if (MayFoldLoad && tryFoldLoad(
Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4271 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
4273 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4274 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4278 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4283 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4284 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4291MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
4292 bool MayFoldLoad,
const SDLoc &dl,
4298 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4299 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
4302 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4303 if (MayFoldLoad && tryFoldLoad(
Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4304 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
4306 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4307 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4312 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4317 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4318 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4323bool X86DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
4324 EVT VT =
N->getValueType(0);
4331 unsigned Size = VT == MVT::i64 ? 64 : 32;
4333 SDValue OrigShiftAmt =
N->getOperand(1);
4334 SDValue ShiftAmt = OrigShiftAmt;
4349 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4350 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4353 if (Add1C && Add1C->getAPIntValue().urem(
Size) == 0) {
4357 ((Add0C && Add0C->getAPIntValue().urem(
Size) ==
Size - 1) ||
4358 (Add1C && Add1C->getAPIntValue().urem(
Size) ==
Size - 1))) {
4362 assert(Add0C ==
nullptr || Add1C ==
nullptr);
4371 NewShiftAmt = CurDAG->getNode(
ISD::XOR,
DL, OpVT,
4372 Add0C ==
nullptr ? Add0 : Add1,
AllOnes);
4378 Add0C->getZExtValue() != 0) {
4381 if (Add0C->getZExtValue() %
Size == 0)
4384 Add0C->getZExtValue() % 32 == 0) {
4392 Add0 = CurDAG->getZExtOrTrunc(Add0,
DL, SubVT);
4396 X = CurDAG->getNode(
ISD::ADD,
DL, SubVT, Add1, Add0);
4418 NewShiftAmt = CurDAG->getNode(
ISD::TRUNCATE,
DL, MVT::i8, NewShiftAmt);
4425 NewShiftAmt = CurDAG->getNode(
ISD::AND,
DL, MVT::i8, NewShiftAmt,
4426 CurDAG->getConstant(
Size - 1,
DL, MVT::i8));
4430 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(
N,
N->getOperand(0),
4432 if (UpdatedNode !=
N) {
4435 ReplaceNode(
N, UpdatedNode);
4442 CurDAG->RemoveDeadNode(OrigShiftAmt.
getNode());
4450bool X86DAGToDAGISel::tryShrinkShlLogicImm(
SDNode *
N) {
4451 MVT NVT =
N->getSimpleValueType(0);
4452 unsigned Opcode =
N->getOpcode();
4460 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4464 int64_t Val = Cst->getSExtValue();
4469 bool FoundAnyExtend =
false;
4473 FoundAnyExtend =
true;
4481 if (NVT != MVT::i32 && NVT != MVT::i64)
4484 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
4488 uint64_t ShAmt = ShlCst->getZExtValue();
4492 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4493 if (Opcode !=
ISD::AND && (Val & RemovedBitsMask) != 0)
4498 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4502 ShiftedVal = (
uint64_t)Val >> ShAmt;
4503 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4506 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4509 ShiftedVal = Val >> ShAmt;
4510 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4511 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4515 ShiftedVal = (
uint64_t)Val >> ShAmt;
4516 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4523 if (!CanShrinkImmediate(ShiftedVal))
4533 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4539 NeededMask &= ~Cst->getAPIntValue();
4541 if (CurDAG->MaskedValueIsZero(
N->getOperand(0), NeededMask))
4546 if (FoundAnyExtend) {
4552 SDValue NewCst = CurDAG->getSignedConstant(ShiftedVal, dl, NVT);
4554 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT,
X, NewCst);
4563bool X86DAGToDAGISel::matchVPTERNLOG(
SDNode *Root,
SDNode *ParentA,
4567 assert(
A.isOperandOf(ParentA) &&
B.isOperandOf(ParentB) &&
4568 C.isOperandOf(ParentC) &&
"Incorrect parent node");
4570 auto tryFoldLoadOrBCast =
4573 if (tryFoldLoad(Root,
P, L,
Base, Scale, Index, Disp, Segment))
4579 L =
L.getOperand(0);
4586 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4587 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4591 return tryFoldBroadcast(Root,
P, L,
Base, Scale, Index, Disp, Segment);
4594 bool FoldedLoad =
false;
4595 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4596 if (tryFoldLoadOrBCast(Root, ParentC,
C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4598 }
else if (tryFoldLoadOrBCast(Root, ParentA,
A, Tmp0, Tmp1, Tmp2, Tmp3,
4604 Imm = OldImm & 0xa5;
4605 if (OldImm & 0x02)
Imm |= 0x10;
4606 if (OldImm & 0x10)
Imm |= 0x02;
4607 if (OldImm & 0x08)
Imm |= 0x40;
4608 if (OldImm & 0x40)
Imm |= 0x08;
4609 }
else if (tryFoldLoadOrBCast(Root, ParentB,
B, Tmp0, Tmp1, Tmp2, Tmp3,
4615 Imm = OldImm & 0x99;
4616 if (OldImm & 0x02)
Imm |= 0x04;
4617 if (OldImm & 0x04)
Imm |= 0x02;
4618 if (OldImm & 0x20)
Imm |= 0x40;
4619 if (OldImm & 0x40)
Imm |= 0x20;
4624 SDValue TImm = CurDAG->getTargetConstant(Imm,
DL, MVT::i8);
4630 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4634 auto *MemIntr = cast<MemIntrinsicSDNode>(
C);
4635 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4636 assert((EltSize == 32 || EltSize == 64) &&
"Unexpected broadcast size!");
4638 bool UseD = EltSize == 32;
4640 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4642 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4644 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4650 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4652 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4654 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4659 SDValue Ops[] = {
A,
B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm,
C.getOperand(0)};
4660 MNode = CurDAG->getMachineNode(Opc,
DL, VTs, Ops);
4663 ReplaceUses(
C.getValue(1),
SDValue(MNode, 1));
4665 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(
C)->getMemOperand()});
4670 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4672 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4674 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4678 MNode = CurDAG->getMachineNode(Opc,
DL, NVT, {
A,
B,
C, TImm});
4682 CurDAG->RemoveDeadNode(Root);
4688bool X86DAGToDAGISel::tryVPTERNLOG(
SDNode *
N) {
4689 MVT NVT =
N->getSimpleValueType(0);
4692 if (!NVT.
isVector() || !Subtarget->hasAVX512() ||
4703 auto getFoldableLogicOp = [](
SDValue Op) {
4706 Op =
Op.getOperand(0);
4708 if (!
Op.hasOneUse())
4711 unsigned Opc =
Op.getOpcode();
4720 if ((FoldableOp = getFoldableLogicOp(N1))) {
4722 }
else if ((FoldableOp = getFoldableLogicOp(N0))) {
4746 Parent =
Op.getNode();
4747 Op =
Op.getOperand(0);
4751 PeekThroughNot(
A, ParentA, TernlogMagicA);
4752 PeekThroughNot(
B, ParentB, TernlogMagicB);
4753 PeekThroughNot(
C, ParentC, TernlogMagicC);
4758 case ISD::AND:
Imm = TernlogMagicB & TernlogMagicC;
break;
4759 case ISD::OR:
Imm = TernlogMagicB | TernlogMagicC;
break;
4760 case ISD::XOR:
Imm = TernlogMagicB ^ TernlogMagicC;
break;
4764 switch (
N->getOpcode()) {
4768 Imm &= ~TernlogMagicA;
4770 Imm = ~(
Imm) & TernlogMagicA;
4777 return matchVPTERNLOG(
N, ParentA, ParentB, ParentC,
A,
B,
C, Imm);
4787bool X86DAGToDAGISel::shrinkAndImmediate(
SDNode *
And) {
4790 MVT VT =
And->getSimpleValueType(0);
4791 if (VT != MVT::i32 && VT != MVT::i64)
4794 auto *And1C = dyn_cast<ConstantSDNode>(
And->getOperand(1));
4803 APInt MaskVal = And1C->getAPIntValue();
4805 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
4809 if (VT == MVT::i64 && MaskLZ >= 32) {
4811 MaskVal = MaskVal.
trunc(32);
4816 APInt NegMaskVal = MaskVal | HighZeros;
4825 if (VT == MVT::i64 && MaskVal.
getBitWidth() < 64) {
4826 NegMaskVal = NegMaskVal.
zext(64);
4827 HighZeros = HighZeros.
zext(64);
4833 KnownBits Known0 = CurDAG->computeKnownBits(And0);
4854 bool FoldedBCast,
bool Masked) {
4855#define VPTESTM_CASE(VT, SUFFIX) \
4858 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4859 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4862#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4863default: llvm_unreachable("Unexpected VT!"); \
4864VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4865VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4866VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4867VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4868VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4869VPTESTM_CASE(v8i64, QZ##SUFFIX)
4871#define VPTESTM_FULL_CASES(SUFFIX) \
4872VPTESTM_BROADCAST_CASES(SUFFIX) \