83#include "llvm/IR/IntrinsicsARM.h"
118#define DEBUG_TYPE "arm-isel"
121STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
122STATISTIC(NumMovwMovt,
"Number of GAs materialized with movw + movt");
123STATISTIC(NumLoopByVals,
"Number of loops generated for byval arguments");
125 "Number of constants with their storage promoted into constant pools");
129 cl::desc(
"Enable / disable ARM interworking (for debugging only)"),
134 cl::desc(
"Enable / disable promotion of unnamed_addr constants into "
139 cl::desc(
"Maximum size of constant to promote into a constant pool"),
143 cl::desc(
"Maximum size of ALL constants to promote into a constant pool"),
148 cl::desc(
"Maximum interleave factor for MVE VLDn to generate."),
153 cl::desc(
"Maximum number of base-updates to check generating postindex."),
161 ARM::R0, ARM::R1, ARM::R2, ARM::R3
175void ARMTargetLowering::addTypeForNEON(
MVT VT,
MVT PromotedLdStVT) {
176 if (VT != PromotedLdStVT) {
185 if (ElemTy != MVT::f64)
189 if (ElemTy == MVT::i32) {
233void ARMTargetLowering::addDRTypeForNEON(
MVT VT) {
235 addTypeForNEON(VT, MVT::f64);
238void ARMTargetLowering::addQRTypeForNEON(
MVT VT) {
240 addTypeForNEON(VT, MVT::v2f64);
243void ARMTargetLowering::setAllExpand(
MVT VT) {
256void ARMTargetLowering::addAllExtLoads(
const MVT From,
const MVT To,
263void ARMTargetLowering::addMVEVectorTypes(
bool HasMVEFP) {
264 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
266 for (
auto VT : IntTypes) {
341 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
342 for (
auto VT : FloatTypes) {
416 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
417 for (
auto VT : LongTypes) {
434 addAllExtLoads(MVT::v8i16, MVT::v8i8,
Legal);
435 addAllExtLoads(MVT::v4i32, MVT::v4i16,
Legal);
436 addAllExtLoads(MVT::v4i32, MVT::v4i8,
Legal);
453 for (
auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
462 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
463 for (
auto VT : pTypes) {
514 RegInfo(Subtarget->getRegisterInfo()),
515 Itins(Subtarget->getInstrItineraryData()) {
521 const Triple &TT = TM.getTargetTriple();
523 if (Subtarget->isThumb1Only())
528 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
529 Subtarget->hasFPRegs()) {
533 if (!Subtarget->hasVFP2Base()) {
534 setAllExpand(MVT::f32);
543 if (!Subtarget->hasFP64()) {
544 setAllExpand(MVT::f64);
554 if (Subtarget->hasFullFP16()) {
569 if (Subtarget->hasBF16()) {
571 setAllExpand(MVT::bf16);
572 if (!Subtarget->hasFullFP16())
584 addAllExtLoads(VT, InnerVT,
Expand);
593 if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
596 if (!Subtarget->hasV8_1MMainlineOps())
599 if (!Subtarget->isThumb1Only())
608 if (Subtarget->hasMVEIntegerOps())
609 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
612 if (Subtarget->hasLOB()) {
616 if (Subtarget->hasNEON()) {
617 addDRTypeForNEON(MVT::v2f32);
618 addDRTypeForNEON(MVT::v8i8);
619 addDRTypeForNEON(MVT::v4i16);
620 addDRTypeForNEON(MVT::v2i32);
621 addDRTypeForNEON(MVT::v1i64);
623 addQRTypeForNEON(MVT::v4f32);
624 addQRTypeForNEON(MVT::v2f64);
625 addQRTypeForNEON(MVT::v16i8);
626 addQRTypeForNEON(MVT::v8i16);
627 addQRTypeForNEON(MVT::v4i32);
628 addQRTypeForNEON(MVT::v2i64);
630 if (Subtarget->hasFullFP16()) {
631 addQRTypeForNEON(MVT::v8f16);
632 addDRTypeForNEON(MVT::v4f16);
635 if (Subtarget->hasBF16()) {
636 addQRTypeForNEON(MVT::v8bf16);
637 addDRTypeForNEON(MVT::v4bf16);
641 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
681 if (Subtarget->hasNEON()) {
794 if (!Subtarget->hasVFP4Base()) {
803 for (
MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
812 for (
auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
821 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
829 if (Subtarget->hasMVEIntegerOps()) {
834 if (Subtarget->hasMVEFloatOps()) {
838 if (!Subtarget->hasFP64()) {
884 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
887 if (Subtarget->hasFullFP16()) {
895 if (!Subtarget->hasFP16()) {
924 if (!Subtarget->isThumb1Only()) {
943 if (TT.isTargetAEABI() && !Subtarget->allowsUnalignedMem()) {
955 if (!Subtarget->isThumb1Only()) {
964 if (Subtarget->hasDSP()) {
974 if (Subtarget->hasBaseDSP()) {
982 if (Subtarget->isThumb1Only()) {
986 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
987 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1002 if (Subtarget->hasMVEIntegerOps())
1006 if (Subtarget->isThumb1Only()) {
1012 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1026 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1035 if (Subtarget->hasPerfMon())
1039 if (!Subtarget->hasV6Ops())
1042 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1043 : Subtarget->hasDivideInARMMode();
1050 if (TT.isOSWindows() && !Subtarget->hasDivideInThumbMode()) {
1062 if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
1063 TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
1066 HasStandaloneRem =
false;
1093 if (TT.isOSWindows())
1100 InsertFencesForAtomic =
false;
1101 if (Subtarget->hasAnyDataBarrier() &&
1102 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1106 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1111 if (!Subtarget->hasAcquireRelease() ||
1114 InsertFencesForAtomic =
true;
1120 if (Subtarget->hasDataBarrier())
1121 InsertFencesForAtomic =
true;
1141 if (!InsertFencesForAtomic) {
1148 if (TT.isOSLinux() || (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1160 }
else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1161 Subtarget->hasForced32BitAtomics()) {
1175 if (!Subtarget->hasV6Ops()) {
1181 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1182 !Subtarget->isThumb1Only()) {
1211 if (Subtarget->hasFullFP16()) {
1221 if (Subtarget->hasFullFP16())
1236 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1237 !Subtarget->isThumb1Only()) {
1244 if (!Subtarget->hasVFP4Base()) {
1250 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1252 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1260 if (!Subtarget->hasFP16()) {
1277 if (Subtarget->hasFPARMv8Base()) {
1287 if (Subtarget->hasFP64())
1291 if (Subtarget->hasNEON()) {
1301 if (Subtarget->hasFullFP16()) {
1338 if (Subtarget->hasNEON()) {
1350 if (Subtarget->hasV8Ops()) {
1360 if (Subtarget->hasFullFP16()) {
1383 if (TT.isOSWindows()) {
1400 if (Subtarget->hasMVEIntegerOps())
1403 if (Subtarget->hasV6Ops())
1405 if (Subtarget->isThumb1Only())
1408 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1409 Subtarget->isThumb2()) {
1415 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1416 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1438 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1446 return Subtarget->useSoftFloat();
1450 return !Subtarget->isThumb1Only() && VT.
getSizeInBits() <= 32;
1463std::pair<const TargetRegisterClass *, uint8_t>
1474 case MVT::f32:
case MVT::f64:
case MVT::v8i8:
case MVT::v4i16:
1475 case MVT::v2i32:
case MVT::v1i64:
case MVT::v2f32:
1476 RRC = &ARM::DPRRegClass;
1481 if (Subtarget->useNEONForSinglePrecisionFP())
1484 case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
1485 case MVT::v4f32:
case MVT::v2f64:
1486 RRC = &ARM::DPRRegClass;
1490 RRC = &ARM::DPRRegClass;
1494 RRC = &ARM::DPRRegClass;
1498 return std::make_pair(RRC,
Cost);
1507 if (Subtarget->hasMVEIntegerOps())
1522 if (Subtarget->hasNEON()) {
1523 if (VT == MVT::v4i64)
1524 return &ARM::QQPRRegClass;
1525 if (VT == MVT::v8i64)
1526 return &ARM::QQQQPRRegClass;
1528 if (Subtarget->hasMVEIntegerOps()) {
1529 if (VT == MVT::v4i64)
1530 return &ARM::MQQPRRegClass;
1531 if (VT == MVT::v8i64)
1532 return &ARM::MQQQQPRRegClass;
1541 Align &PrefAlign)
const {
1548 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ?
Align(8) :
Align(4));
1560 unsigned NumVals =
N->getNumValues();
1564 for (
unsigned i = 0; i != NumVals; ++i) {
1565 EVT VT =
N->getValueType(i);
1566 if (VT == MVT::Glue || VT == MVT::Other)
1572 if (!
N->isMachineOpcode())
1580 if (
MCID.getNumDefs() == 0)
1582 if (!Itins->isEmpty() &&
1583 Itins->getOperandCycle(
MCID.getSchedClass(), 0) > 2U)
1597 return Const->getZExtValue() == 16;
1605 return Const->getZExtValue() == 16;
1613 return Const->getZExtValue() == 16;
1682 bool isVarArg)
const {
1701 if (!
getTM().isAAPCS_ABI())
1703 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
1711 if (!
getTM().isAAPCS_ABI()) {
1712 if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && !isVarArg)
1715 }
else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
1724 bool isVarArg)
const {
1725 return CCAssignFnForNode(CC,
false, isVarArg);
1729 bool isVarArg)
const {
1730 return CCAssignFnForNode(CC,
true, isVarArg);
1737 bool isVarArg)
const {
1738 switch (getEffectiveCallingConv(CC, isVarArg)) {
1764 if (Subtarget->hasFullFP16()) {
1765 Val = DAG.
getNode(ARMISD::VMOVhr, dl, ValVT, Val);
1777 if (Subtarget->hasFullFP16()) {
1778 Val = DAG.
getNode(ARMISD::VMOVrh, dl,
1791SDValue ARMTargetLowering::LowerCallResult(
1795 SDValue ThisVal,
bool isCmseNSCall)
const {
1803 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
1804 CCValAssign VA = RVLocs[i];
1808 if (i == 0 && isThisReturn) {
1810 "unexpected return calling convention register assignment");
1828 if (!Subtarget->isLittle())
1830 Val = DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
1845 if (!Subtarget->isLittle())
1847 Val = DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
1877 const ISD::InputArg &Arg = Ins[VA.
getValNo()];
1888std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
1890 bool IsTailCall,
int SPDiff)
const {
1892 MachinePointerInfo DstInfo;
1912 return std::make_pair(DstAddr, DstInfo);
1921ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
1934 if (!SrcFrameIdxNode || !DstFrameIdxNode)
1937 int SrcFI = SrcFrameIdxNode->getIndex();
1938 int DstFI = DstFrameIdxNode->getIndex();
1940 "byval passed in non-fixed stack slot");
1962 if (SrcOffset == DstOffset)
1970 RegsToPassVector &RegsToPass,
1977 DAG.
getVTList(MVT::i32, MVT::i32), Arg);
1978 unsigned id = Subtarget->isLittle() ? 0 : 1;
1990 MachinePointerInfo DstInfo;
1991 std::tie(DstAddr, DstInfo) =
1992 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2009 SelectionDAG &DAG = CLI.
DAG;
2011 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.
Outs;
2012 SmallVectorImpl<SDValue> &OutVals = CLI.
OutVals;
2013 SmallVectorImpl<ISD::InputArg> &Ins = CLI.
Ins;
2020 const CallBase *CB = CLI.
CB;
2023 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
2025 MachineFunction::CallSiteInfo CSInfo;
2026 bool isStructRet = (Outs.
empty()) ?
false : Outs[0].Flags.isSRet();
2027 bool isThisReturn =
false;
2028 bool isCmseNSCall =
false;
2029 bool isSibCall =
false;
2030 bool PreferIndirect =
false;
2031 bool GuardWithBTI =
false;
2041 !Subtarget->noBTIAtReturnTwice())
2049 isCmseNSCall =
true;
2052 if (!Subtarget->supportsTailCall())
2068 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2069 count_if(GV->users(), [&BB](
const User *U) {
2070 return isa<Instruction>(U) &&
2071 cast<Instruction>(U)->getParent() == BB;
2078 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2092 "site marked musttail");
2095 unsigned NumBytes = CCInfo.getStackSize();
2104 if (isTailCall && !isSibCall) {
2105 auto FuncInfo = MF.
getInfo<ARMFunctionInfo>();
2106 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2111 assert(StackAlign &&
"data layout string is missing stack alignment");
2112 NumBytes =
alignTo(NumBytes, *StackAlign);
2117 SPDiff = NumReusableBytes - NumBytes;
2121 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (
unsigned)-SPDiff)
2137 RegsToPassVector RegsToPass;
2146 DenseMap<unsigned, SDValue> ByValTemporaries;
2150 for (
const CCValAssign &VA : ArgLocs) {
2152 SDValue Src = OutVals[ArgIdx];
2153 ISD::ArgFlagsTy
Flags = Outs[ArgIdx].Flags;
2155 if (!
Flags.isByVal())
2159 MachinePointerInfo DstInfo;
2160 std::tie(Dst, DstInfo) =
2161 computeAddrForCallArg(dl, DAG, VA,
SDValue(),
true, SPDiff);
2162 ByValCopyKind
Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2164 if (Copy == NoCopy) {
2169 }
else if (Copy == CopyOnce) {
2173 ByValTemporaries[ArgIdx] = Src;
2175 assert(Copy == CopyViaTemp &&
"unexpected enum value");
2179 int TempFrameIdx = MFI.CreateStackObject(
2180 Flags.getByValSize(),
Flags.getNonZeroByValAlign(),
false);
2188 SDVTList VTs = DAG.
getVTList(MVT::Other, MVT::Glue);
2189 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2191 DAG.
getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops));
2192 ByValTemporaries[ArgIdx] = Temp;
2195 if (!ByValCopyChains.
empty())
2205 bool AfterFormalArgLoads =
false;
2209 for (
unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2211 ++i, ++realArgIdx) {
2212 CCValAssign &VA = ArgLocs[i];
2213 SDValue Arg = OutVals[realArgIdx];
2214 ISD::ArgFlagsTy
Flags = Outs[realArgIdx].Flags;
2215 bool isByVal =
Flags.isByVal();
2235 if (isTailCall && VA.
isMemLoc() && !AfterFormalArgLoads) {
2237 if (ByValTempChain) {
2242 for (
unsigned I = 0;
I < OutVals.
size(); ++
I) {
2243 if (Outs[
I].
Flags.isByVal())
2251 FrameIndexSDNode *FIN =
2256 if (!MFI.isFixedObjectIndex(FIN->
getIndex()))
2259 for (
const CCValAssign &VA : ArgLocs) {
2267 if (!IncomingLoad.
empty()) {
2275 AfterFormalArgLoads =
true;
2287 auto ArgVT = Outs[realArgIdx].ArgVT;
2288 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2306 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2307 StackPtr, MemOpChains, isTailCall, SPDiff);
2311 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2312 StackPtr, MemOpChains, isTailCall, SPDiff);
2316 MachinePointerInfo DstInfo;
2317 std::tie(DstAddr, DstInfo) =
2318 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2322 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2323 StackPtr, MemOpChains, isTailCall, SPDiff);
2325 if (realArgIdx == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
2326 Outs[0].VT == MVT::i32) {
2328 "unexpected calling convention register assignment");
2330 "unexpected use of 'returned'");
2331 isThisReturn =
true;
2336 RegsToPass.push_back(std::make_pair(VA.
getLocReg(), Arg));
2337 }
else if (isByVal) {
2339 unsigned offset = 0;
2343 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2344 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2347 bool NeedsStackCopy;
2348 if (
auto It = ByValTemporaries.
find(realArgIdx);
2349 It != ByValTemporaries.
end()) {
2350 ByValSrc = It->second;
2351 NeedsStackCopy =
true;
2354 NeedsStackCopy = !isTailCall;
2358 if (CurByValIdx < ByValArgsCount) {
2359 unsigned RegBegin, RegEnd;
2360 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2364 for (i = 0, j = RegBegin;
j < RegEnd; i++,
j++) {
2368 DAG.
getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2371 RegsToPass.push_back(std::make_pair(j, Load));
2376 offset = RegEnd - RegBegin;
2378 CCInfo.nextInRegsParam();
2383 if (NeedsStackCopy &&
Flags.getByValSize() > 4 * offset) {
2386 MachinePointerInfo DstInfo;
2387 std::tie(Dst, DstInfo) =
2388 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2396 SDVTList VTs = DAG.
getVTList(MVT::Other, MVT::Glue);
2397 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2404 MachinePointerInfo DstInfo;
2405 std::tie(DstAddr, DstInfo) =
2406 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2413 if (!MemOpChains.
empty())
2419 for (
const auto &[
Reg,
N] : RegsToPass) {
2427 bool isDirect =
false;
2430 const Triple &
TT = TM.getTargetTriple();
2431 const GlobalValue *GVal =
nullptr;
2433 GVal =
G->getGlobal();
2434 bool isStub = !TM.shouldAssumeDSOLocal(GVal) &&
TT.isOSBinFormatMachO();
2436 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2437 bool isLocalARMFunc =
false;
2440 if (Subtarget->genLongCalls()) {
2442 "long-calls codegen is not position independent!");
2447 if (Subtarget->genExecuteOnly()) {
2448 if (Subtarget->useMovt())
2460 Addr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2466 const char *Sym = S->getSymbol();
2468 if (Subtarget->genExecuteOnly()) {
2469 if (Subtarget->useMovt())
2481 Addr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2488 if (!PreferIndirect) {
2493 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !
ARMInterworking);
2495 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2496 assert(
TT.isOSBinFormatMachO() &&
"WrapperPIC use on non-MachO?");
2498 ARMISD::WrapperPIC, dl, PtrVt,
2505 }
else if (Subtarget->isTargetCOFF()) {
2506 assert(Subtarget->isTargetWindows() &&
2507 "Windows is the only supported COFF target");
2511 else if (!TM.shouldAssumeDSOLocal(GVal))
2518 DAG.
getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2527 const char *Sym = S->getSymbol();
2528 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2530 ARMConstantPoolValue *CPV =
2532 ARMPCLabelIndex, 4);
2534 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2539 Callee = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2546 assert(!isARMFunc && !isDirect &&
2547 "Cannot handle call to ARM function or direct call");
2551 "call to non-secure function would require "
2552 "passing arguments on stack",
2558 "call to non-secure function would return value through pointer",
2565 if (Subtarget->isThumb()) {
2567 CallOpc = ARMISD::t2CALL_BTI;
2568 else if (isCmseNSCall)
2569 CallOpc = ARMISD::tSECALL;
2570 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2571 CallOpc = ARMISD::CALL_NOLINK;
2573 CallOpc = ARMISD::CALL;
2575 if (!isDirect && !Subtarget->hasV5TOps())
2576 CallOpc = ARMISD::CALL_NOLINK;
2577 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2579 !Subtarget->hasMinSize())
2581 CallOpc = ARMISD::CALL_NOLINK;
2583 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2590 if (isTailCall && !isSibCall) {
2595 std::vector<SDValue>
Ops;
2596 Ops.push_back(Chain);
2597 Ops.push_back(Callee);
2605 for (
const auto &[
Reg,
N] : RegsToPass)
2609 const uint32_t *
Mask;
2610 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2618 isThisReturn =
false;
2624 assert(Mask &&
"Missing call preserved mask for calling convention");
2628 Ops.push_back(InGlue);
2641 Chain = DAG.
getNode(CallOpc, dl, {MVT::Other, MVT::Glue},
Ops);
2652 uint64_t CalleePopBytes =
2655 Chain = DAG.
getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2661 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2662 InVals, isThisReturn,
2663 isThisReturn ? OutVals[0] :
SDValue(), isCmseNSCall);
2670void ARMTargetLowering::HandleByVal(
CCState *State,
unsigned &
Size,
2671 Align Alignment)
const {
2673 Alignment = std::max(Alignment,
Align(4));
2679 unsigned AlignInRegs = Alignment.
value() / 4;
2680 unsigned Waste = (ARM::R4 -
Reg) % AlignInRegs;
2681 for (
unsigned i = 0; i < Waste; ++i)
2687 unsigned Excess = 4 * (ARM::R4 -
Reg);
2694 if (NSAAOffset != 0 &&
Size > Excess) {
2706 unsigned ByValRegBegin =
Reg;
2707 unsigned ByValRegEnd = std::min<unsigned>(
Reg +
Size / 4, ARM::R4);
2711 for (
unsigned i =
Reg + 1; i != ByValRegEnd; ++i)
2717 Size = std::max<int>(
Size - Excess, 0);
2725bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2731 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.
Outs;
2732 const SmallVectorImpl<SDValue> &OutVals = CLI.
OutVals;
2733 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.
Ins;
2734 const SelectionDAG &DAG = CLI.
DAG;
2739 assert(Subtarget->supportsTailCall());
2752 SmallSet<MCPhysReg, 5> AddressRegisters = {ARM::R0, ARM::R1, ARM::R2,
2754 if (!(Subtarget->isThumb1Only() ||
2755 MF.
getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
true)))
2756 AddressRegisters.
insert(ARM::R12);
2757 for (
const CCValAssign &AL : ArgLocs)
2759 AddressRegisters.
erase(
AL.getLocReg());
2760 if (AddressRegisters.
empty()) {
2761 LLVM_DEBUG(
dbgs() <<
"false (no reg to hold function pointer)\n");
2780 <<
" (guaranteed tail-call CC)\n");
2781 return CalleeCC == CallerCC;
2786 bool isCalleeStructRet = Outs.
empty() ?
false : Outs[0].Flags.isSRet();
2788 if (isCalleeStructRet != isCallerStructRet) {
2801 const GlobalValue *GV =
G->getGlobal();
2804 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
2805 TT.isOSBinFormatMachO())) {
2814 getEffectiveCallingConv(CalleeCC, isVarArg),
2815 getEffectiveCallingConv(CallerCC, CallerF.
isVarArg()), MF,
C, Ins,
2822 const ARMBaseRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2823 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
2824 if (CalleeCC != CallerCC) {
2825 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
2826 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
2835 const ARMFunctionInfo *AFI_Caller = MF.
getInfo<ARMFunctionInfo>();
2843 const MachineRegisterInfo &MRI = MF.
getRegInfo();
2845 LLVM_DEBUG(
dbgs() <<
"false (parameters in CSRs do not match)\n");
2864 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
2873 StringRef IntKind =
F.getFnAttribute(
"interrupt").getValueAsString();
2886 if (IntKind ==
"" || IntKind ==
"IRQ" || IntKind ==
"FIQ" ||
2889 else if (IntKind ==
"SWI" || IntKind ==
"UNDEF")
2893 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2898 return DAG.
getNode(ARMISD::INTRET_GLUE,
DL, MVT::Other, RetOps);
2920 bool isLittleEndian = Subtarget->isLittle();
2923 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
2932 "secure entry function would return value through pointer",
2937 for (
unsigned i = 0, realRVLocIdx = 0;
2939 ++i, ++realRVLocIdx) {
2940 CCValAssign &VA = RVLocs[i];
2943 SDValue Arg = OutVals[realRVLocIdx];
2944 bool ReturnF16 =
false;
2946 if (Subtarget->hasFullFP16() &&
getTM().isTargetHardFloat()) {
2979 auto RetVT = Outs[realRVLocIdx].ArgVT;
3001 DAG.
getVTList(MVT::i32, MVT::i32), Half);
3005 HalfGPRs.
getValue(isLittleEndian ? 0 : 1), Glue);
3011 HalfGPRs.
getValue(isLittleEndian ? 1 : 0), Glue);
3023 DAG.
getVTList(MVT::i32, MVT::i32), Arg);
3025 fmrrd.
getValue(isLittleEndian ? 0 : 1), Glue);
3030 fmrrd.
getValue(isLittleEndian ? 1 : 0), Glue);
3040 const ARMBaseRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3066 !Subtarget->isMClass()) {
3067 if (Subtarget->isThumb1Only())
3074 return DAG.
getNode(RetNode, dl, MVT::Other, RetOps);
3077bool ARMTargetLowering::isUsedByReturnOnly(
SDNode *
N,
SDValue &Chain)
const {
3078 if (
N->getNumValues() != 1)
3080 if (!
N->hasNUsesOfValue(1, 0))
3084 SDNode *
Copy = *
N->user_begin();
3088 if (
Copy->getOperand(
Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3090 TCChain =
Copy->getOperand(0);
3091 }
else if (
Copy->getOpcode() == ARMISD::VMOVRRD) {
3092 SDNode *VMov =
Copy;
3094 SmallPtrSet<SDNode*, 2>
Copies;
3095 for (SDNode *U : VMov->
users()) {
3103 for (SDNode *U : VMov->
users()) {
3104 SDValue UseChain =
U->getOperand(0);
3112 if (
U->getOperand(
U->getNumOperands() - 1).getValueType() == MVT::Glue)
3120 if (!
Copy->hasOneUse())
3127 if (
Copy->getOperand(
Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3129 TCChain =
Copy->getOperand(0);
3134 bool HasRet =
false;
3135 for (
const SDNode *U :
Copy->users()) {
3136 if (
U->getOpcode() != ARMISD::RET_GLUE &&
3137 U->getOpcode() != ARMISD::INTRET_GLUE)
3149bool ARMTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
3150 if (!Subtarget->supportsTailCall())
3167 &&
"LowerWRITE_REGISTER called for non-i64 type argument.");
3183 EVT PtrVT =
Op.getValueType();
3193 if (Subtarget->genExecuteOnly()) {
3198 auto GV =
new GlobalVariable(
3204 return LowerGlobalAddress(GA, DAG);
3210 if (Subtarget->isThumb1Only())
3211 CPAlign = std::max(CPAlign,
Align(4));
3217 return DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3224 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3233 unsigned ARMPCLabelIndex = 0;
3239 if (!IsPositionIndependent) {
3242 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3249 CPAddr = DAG.
getNode(ARMISD::Wrapper,
DL, PtrVT, CPAddr);
3253 if (!IsPositionIndependent)
3256 return DAG.
getNode(ARMISD::PIC_ADD,
DL, PtrVT, Result, PICLabel);
3284ARMTargetLowering::LowerGlobalTLSAddressDarwin(
SDValue Op,
3287 "This function expects a Darwin target");
3292 SDValue DescAddr = LowerGlobalAddressDarwin(
Op, DAG);
3298 MVT::i32,
DL, Chain, DescAddr,
3313 auto ARI =
static_cast<const ARMRegisterInfo *
>(
TRI);
3322 Chain, FuncTLVGet, DAG.
getRegister(ARM::R0, MVT::i32),
3328ARMTargetLowering::LowerGlobalTLSAddressWindows(
SDValue Op,
3331 "Windows specific TLS lowering");
3355 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
3363 TLSIndex = DAG.
getNode(ARMISD::Wrapper,
DL, PtrVT, TLSIndex);
3364 TLSIndex = DAG.
getLoad(PtrVT,
DL, Chain, TLSIndex, MachinePointerInfo());
3370 MachinePointerInfo());
3377 DAG.
getNode(ARMISD::Wrapper,
DL, MVT::i32,
3390 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3392 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3394 ARMConstantPoolValue *CPV =
3405 Argument = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3412 TargetLowering::CallLoweringInfo CLI(DAG);
3417 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3418 return CallResult.first;
3427 const GlobalValue *GV = GA->
getGlobal();
3433 SDValue ThreadPointer = DAG.
getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3437 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3440 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3441 ARMConstantPoolValue *CPV =
3448 PtrVT, dl, Chain,
Offset,
3456 PtrVT, dl, Chain,
Offset,
3461 ARMConstantPoolValue *CPV =
3466 PtrVT, dl, Chain,
Offset,
3482 if (
TT.isOSDarwin())
3483 return LowerGlobalTLSAddressDarwin(
Op, DAG);
3485 if (
TT.isOSWindows())
3486 return LowerGlobalTLSAddressWindows(
Op, DAG);
3489 assert(
TT.isOSBinFormatELF() &&
"Only ELF implemented here");
3495 return LowerToTLSGeneralDynamicModel(GA, DAG);
3498 return LowerToTLSExecModels(GA, DAG, model);
3507 while (!Worklist.
empty()) {
3515 if (!
I ||
I->getParent()->getParent() !=
F)
3544 if (!GVar || !GVar->hasInitializer() ||
3545 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3546 !GVar->hasLocalLinkage())
3551 auto *
Init = GVar->getInitializer();
3553 Init->needsDynamicRelocation())
3565 unsigned RequiredPadding = 4 - (
Size % 4);
3566 bool PaddingPossible =
3567 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3572 unsigned PaddedSize =
Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3596 if (RequiredPadding != 4) {
3601 while (RequiredPadding--)
3613 ++NumConstpoolPromoted;
3614 return DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3619 if (!(GV = GA->getAliaseeObject()))
3622 return V->isConstant();
3631 return LowerGlobalAddressWindows(
Op, DAG);
3633 return LowerGlobalAddressELF(
Op, DAG);
3635 return LowerGlobalAddressDarwin(
Op, DAG);
3647 if (GV->
isDSOLocal() && !Subtarget->genExecuteOnly())
3660 }
else if (Subtarget->isROPI() && IsRO) {
3665 }
else if (Subtarget->isRWPI() && !IsRO) {
3668 if (Subtarget->useMovt()) {
3671 RelAddr = DAG.
getNode(ARMISD::Wrapper, dl, PtrVT,
G);
3673 ARMConstantPoolValue *CPV =
3676 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3690 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3691 if (Subtarget->useMovt())
3695 return DAG.
getNode(ARMISD::Wrapper, dl, PtrVT,
3699 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3708 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3709 "ROPI/RWPI not currently supported for Darwin");
3714 if (Subtarget->useMovt())
3725 if (Subtarget->isGVIndirectSymbol(GV))
3734 "non-Windows COFF is not supported");
3735 assert(Subtarget->useMovt() &&
3736 "Windows on ARM expects to use movw/movt");
3737 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3738 "ROPI/RWPI not currently supported for Windows");
3745 else if (!TM.shouldAssumeDSOLocal(GV))
3768 return DAG.
getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3769 DAG.
getVTList(MVT::i32, MVT::Other),
Op.getOperand(0),
3770 Op.getOperand(1), Val);
3776 return DAG.
getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
Op.getOperand(0),
3783 return DAG.
getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3787SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3790 Op.getConstantOperandVal(
Op.getOperand(0).getValueType() == MVT::Other);
3794 case Intrinsic::arm_gnu_eabi_mcount: {
3800 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3801 const uint32_t *
Mask =
3803 assert(Mask &&
"Missing call preserved mask for calling convention");
3808 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3812 if (Subtarget->isThumb())
3815 ARM::tBL_PUSHLR, dl, ResultTys,
3816 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3817 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3821 {ReturnAddress, Callee, RegisterMask, Chain}),
3830 unsigned IntNo =
Op.getConstantOperandVal(0);
3834 case Intrinsic::thread_pointer: {
3836 return DAG.
getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3838 case Intrinsic::arm_cls: {
3842 const SDValue &Operand =
Op.getOperand(1);
3843 const EVT VTy =
Op.getValueType();
3846 case Intrinsic::arm_cls64: {
3852 case Intrinsic::arm_neon_vcls:
3853 case Intrinsic::arm_mve_vcls: {
3856 const EVT VTy =
Op.getValueType();
3859 case Intrinsic::eh_sjlj_lsda: {
3861 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
3866 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3867 ARMConstantPoolValue *CPV =
3871 CPAddr = DAG.
getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3876 if (IsPositionIndependent) {
3878 Result = DAG.
getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3882 case Intrinsic::arm_neon_vabs:
3885 case Intrinsic::arm_neon_vabds:
3886 if (
Op.getValueType().isInteger())
3888 Op.getOperand(1),
Op.getOperand(2));
3890 case Intrinsic::arm_neon_vabdu:
3892 Op.getOperand(1),
Op.getOperand(2));
3893 case Intrinsic::arm_neon_vmulls:
3894 case Intrinsic::arm_neon_vmullu: {
3895 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3896 ? ARMISD::VMULLs : ARMISD::VMULLu;
3897 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3898 Op.getOperand(1),
Op.getOperand(2));
3900 case Intrinsic::arm_neon_vminnm:
3901 case Intrinsic::arm_neon_vmaxnm: {
3902 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3904 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3905 Op.getOperand(1),
Op.getOperand(2));
3907 case Intrinsic::arm_neon_vminu:
3908 case Intrinsic::arm_neon_vmaxu: {
3909 if (
Op.getValueType().isFloatingPoint())
3911 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3913 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3914 Op.getOperand(1),
Op.getOperand(2));
3916 case Intrinsic::arm_neon_vmins:
3917 case Intrinsic::arm_neon_vmaxs: {
3919 if (!
Op.getValueType().isFloatingPoint()) {
3920 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3922 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3923 Op.getOperand(1),
Op.getOperand(2));
3925 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3927 return DAG.
getNode(NewOpc, SDLoc(
Op),
Op.getValueType(),
3928 Op.getOperand(1),
Op.getOperand(2));
3930 case Intrinsic::arm_neon_vtbl1:
3931 return DAG.
getNode(ARMISD::VTBL1, SDLoc(
Op),
Op.getValueType(),
3932 Op.getOperand(1),
Op.getOperand(2));
3933 case Intrinsic::arm_neon_vtbl2:
3934 return DAG.
getNode(ARMISD::VTBL2, SDLoc(
Op),
Op.getValueType(),
3935 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3936 case Intrinsic::arm_mve_pred_i2v:
3937 case Intrinsic::arm_mve_pred_v2i:
3938 return DAG.
getNode(ARMISD::PREDICATE_CAST, SDLoc(
Op),
Op.getValueType(),
3940 case Intrinsic::arm_mve_vreinterpretq:
3941 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, SDLoc(
Op),
Op.getValueType(),
3943 case Intrinsic::arm_mve_lsll:
3944 return DAG.
getNode(ARMISD::LSLL, SDLoc(
Op),
Op->getVTList(),
3945 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3946 case Intrinsic::arm_mve_asrl:
3947 return DAG.
getNode(ARMISD::ASRL, SDLoc(
Op),
Op->getVTList(),
3948 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3949 case Intrinsic::arm_mve_vsli:
3950 return DAG.
getNode(ARMISD::VSLIIMM, SDLoc(
Op),
Op->getVTList(),
3951 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3952 case Intrinsic::arm_mve_vsri:
3953 return DAG.
getNode(ARMISD::VSRIIMM, SDLoc(
Op),
Op->getVTList(),
3954 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3965 if (!Subtarget->hasDataBarrier()) {
3969 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3970 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3971 return DAG.
getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other,
Op.getOperand(0),
3981 }
else if (Subtarget->preferISHSTBarriers() &&
3990 DAG.
getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3998 (!Subtarget->
isThumb1Only() && Subtarget->hasV5TEOps())))
4000 return Op.getOperand(0);
4003 unsigned isRead =
~Op.getConstantOperandVal(2) & 1;
4005 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4007 return Op.getOperand(0);
4009 unsigned isData =
Op.getConstantOperandVal(4);
4010 if (Subtarget->isThumb()) {
4012 isRead = ~isRead & 1;
4013 isData = ~isData & 1;
4016 return DAG.
getNode(ARMISD::PRELOAD, dl, MVT::Other,
Op.getOperand(0),
4031 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
4039 const SDLoc &dl)
const {
4041 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4043 const TargetRegisterClass *RC;
4045 RC = &ARM::tGPRRegClass;
4047 RC = &ARM::GPRRegClass;
4061 MVT::i32, dl, Root, FIN,
4067 if (!Subtarget->isLittle())
4069 return DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4082 const Value *OrigArg,
4083 unsigned InRegsParamRecordIdx,
4084 int ArgOffset,
unsigned ArgSize)
const {
4098 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4099 unsigned RBegin, REnd;
4104 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 :
GPRArgRegs[RBeginIdx];
4109 ArgOffset = -4 * (ARM::R4 - RBegin);
4116 const TargetRegisterClass *RC =
4119 for (
unsigned Reg = RBegin, i = 0;
Reg < REnd; ++
Reg, ++i) {
4123 MachinePointerInfo(OrigArg, 4 * i));
4128 if (!MemOps.
empty())
4137 unsigned TotalArgRegsSaveSize,
4138 bool ForceMutable)
const {
4140 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4149 CCInfo.
getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4153bool ARMTargetLowering::splitValueIntoRegisterParts(
4155 unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID> CC)
const {
4157 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4169SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4171 MVT PartVT,
EVT ValueVT, std::optional<CallingConv::ID> CC)
const {
4172 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4185SDValue ARMTargetLowering::LowerFormalArguments(
4192 ARMFunctionInfo *AFI = MF.
getInfo<ARMFunctionInfo>();
4201 unsigned CurArgIdx = 0;
4213 unsigned ArgRegBegin = ARM::R4;
4214 for (
const CCValAssign &VA : ArgLocs) {
4220 if (!
Flags.isByVal())
4224 unsigned RBegin, REnd;
4226 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4232 int lastInsIndex = -1;
4236 ArgRegBegin = std::min(ArgRegBegin, (
unsigned)
GPRArgRegs[RegIdx]);
4239 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4243 for (
unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4244 CCValAssign &VA = ArgLocs[i];
4245 if (Ins[VA.
getValNo()].isOrigArg()) {
4246 std::advance(CurOrigArg,
4247 Ins[VA.
getValNo()].getOrigArgIndex() - CurArgIdx);
4248 CurArgIdx = Ins[VA.
getValNo()].getOrigArgIndex();
4259 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4266 MVT::f64, dl, Chain, FIN,
4269 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4277 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4279 const TargetRegisterClass *RC;
4281 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4282 RC = &ARM::HPRRegClass;
4283 else if (RegVT == MVT::f32)
4284 RC = &ARM::SPRRegClass;
4285 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4286 RegVT == MVT::v4bf16)
4287 RC = &ARM::DPRRegClass;
4288 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4289 RegVT == MVT::v8bf16)
4290 RC = &ARM::QPRRegClass;
4291 else if (RegVT == MVT::i32)
4293 : &ARM::GPRRegClass;
4330 const ISD::InputArg &Arg = Ins[VA.
getValNo()];
4339 assert(VA.
getValVT() != MVT::i64 &&
"i64 should already be lowered");
4345 if (index != lastInsIndex)
4347 ISD::ArgFlagsTy
Flags = Ins[index].Flags;
4353 if (
Flags.isByVal()) {
4354 assert(Ins[index].isOrigArg() &&
4355 "Byval arguments cannot be implicit");
4359 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4393 lastInsIndex = index;
4400 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.
getStackSize(),
4401 TotalArgRegsSaveSize);
4405 "secure entry function must not be variadic", dl.
getDebugLoc()));
4415 assert(StackAlign &&
"data layout string is missing stack alignment");
4416 StackArgSize =
alignTo(StackArgSize, *StackAlign);
4425 "secure entry function requires arguments on stack", dl.
getDebugLoc()));
4434 return CFP->getValueAPF().isPosZero();
4437 if (
Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4438 SDValue WrapperOp =
Op.getOperand(1).getOperand(0);
4441 return CFP->getValueAPF().isPosZero();
4444 Op->getValueType(0) == MVT::f64) {
4448 if (BitcastOp->
getOpcode() == ARMISD::VMOVIMM &&
4457 if (
Op->getFlags().hasNoSignedWrap())
4473 (isIntEqualitySetCC(CC) ||
4482 const SDLoc &dl)
const {
4484 unsigned C = RHSC->getZExtValue();
4548 if (Subtarget->isThumb1Only() &&
LHS->getOpcode() ==
ISD::AND &&
4552 unsigned Mask =
LHS.getConstantOperandVal(1);
4554 uint64_t RHSV = RHSC->getZExtValue();
4555 if (
isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4557 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4571 if (Subtarget->isThumb1Only() &&
LHS->getOpcode() ==
ISD::SHL &&
4574 LHS.getConstantOperandVal(1) < 31) {
4575 unsigned ShiftAmt =
LHS.getConstantOperandVal(1) + 1;
4600 unsigned CompareType;
4603 CompareType = ARMISD::CMP;
4608 CompareType = ARMISD::CMPZ;
4617 if (CompareType != ARMISD::CMPZ &&
isCMN(
RHS, CC, DAG)) {
4618 CompareType = ARMISD::CMN;
4620 }
else if (CompareType != ARMISD::CMPZ &&
isCMN(
LHS, CC, DAG)) {
4621 CompareType = ARMISD::CMN;
4633 bool Signaling)
const {
4634 assert(Subtarget->hasFP64() ||
RHS.getValueType() != MVT::f64);
4640 Flags = DAG.
getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4649std::pair<SDValue, SDValue>
4652 assert(
Op.getValueType() == MVT::i32 &&
"Unsupported value type");
4664 switch (
Op.getOpcode()) {
4716 return std::make_pair(
Value, OverflowCmp);
4729 return Cmp.getValue(1);
4757 return DAG.
getNode(ARMISD::CMOV,
DL, VT, Zero, One, ARMcc, Flags);
4769 EVT VT =
Op.getValueType();
4770 SDVTList VTs = DAG.
getVTList(VT, MVT::i32);
4773 switch (
Op.getOpcode()) {
4787 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Op, DAG, ARMcc);
4793 DAG.
getNode(ARMISD::CMOV, dl, MVT::i32,
4796 ARMcc, OverflowCmp);
4806 EVT VT =
Op.getValueType();
4807 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->
isThumb1Only())
4817 switch (
Op->getOpcode()) {
4819 NewOpcode = ARMISD::UQADD8b;
4822 NewOpcode = ARMISD::QADD8b;
4825 NewOpcode = ARMISD::UQSUB8b;
4828 NewOpcode = ARMISD::QSUB8b;
4833 switch (
Op->getOpcode()) {
4835 NewOpcode = ARMISD::UQADD16b;
4838 NewOpcode = ARMISD::QADD16b;
4841 NewOpcode = ARMISD::UQSUB16b;
4844 NewOpcode = ARMISD::QSUB16b;
4852 DAG.
getNode(NewOpcode, dl, MVT::i32,
4863 unsigned Opc =
Cond.getOpcode();
4865 if (
Cond.getResNo() == 1 &&
4873 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
4874 EVT VT =
Op.getValueType();
4876 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
4884 if (
Cond.getOpcode() == ARMISD::CMOV &&
Cond.hasOneUse()) {
4885 const ConstantSDNode *CMOVTrue =
4887 const ConstantSDNode *CMOVFalse =
4890 if (CMOVTrue && CMOVFalse) {
4896 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4898 False = SelectFalse;
4899 }
else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4905 return getCMOV(dl,
Op.getValueType(), True, False,
Cond.getOperand(2),
4906 Cond.getOperand(3), DAG);
4916 bool &swpCmpOps,
bool &swpVselOps) {
4944 swpCmpOps = !swpCmpOps;
4945 swpVselOps = !swpVselOps;
4968 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4970 DAG.
getVTList(MVT::i32, MVT::i32), FalseVal);
4972 DAG.
getVTList(MVT::i32, MVT::i32), TrueVal);
4986 return DAG.
getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
5007 ((K ==
LHS && K == TrueVal) || (K ==
RHS && K == FalseVal))) ||
5009 ((K ==
RHS && K == TrueVal) || (K ==
LHS && K == FalseVal)));
5030 EVT VT =
Op.getValueType();
5052 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5065 int64_t PosVal = std::max(Val1, Val2);
5066 int64_t NegVal = std::min(Val1, Val2);
5078 return DAG.
getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5081 return DAG.
getNode(ARMISD::USAT, dl, VT, V2Tmp,
5113 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5118 if (*K != KTmp || V != VTmp)
5129bool ARMTargetLowering::isUnsupportedFloatingType(
EVT VT)
const {
5131 return !Subtarget->hasVFP2Base();
5133 return !Subtarget->hasFP64();
5135 return !Subtarget->hasFullFP16();
5143 if (!CFVal || !CTVal || !Subtarget->hasV8_1MMainlineOps())
5151 if (TVal == ~FVal) {
5152 Opcode = ARMISD::CSINV;
5153 }
else if (TVal == ~FVal + 1) {
5154 Opcode = ARMISD::CSNEG;
5155 }
else if (TVal + 1 == FVal) {
5156 Opcode = ARMISD::CSINC;
5157 }
else if (TVal == FVal + 1) {
5158 Opcode = ARMISD::CSINC;
5161 InvertCond = !InvertCond;
5168 if (Opcode != ARMISD::CSINC &&
5172 InvertCond = !InvertCond;
5178 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5181 InvertCond = !InvertCond;
5188 EVT VT =
Op.getValueType();
5192 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5204 if (VT == MVT::i32 &&
5223 if (
Op.getValueType().isInteger()) {
5231 LHS.getValueType() ==
RHS.getValueType()) {
5232 EVT VT =
LHS.getValueType();
5238 Shift = DAG.
getNOT(dl, Shift, VT);
5250 if (
LHS.getValueType() == MVT::i32) {
5254 matchCSET(Opcode, InvertCond, TrueVal, FalseVal, Subtarget)) {
5260 EVT VT =
Op.getValueType();
5261 return DAG.
getNode(Opcode, dl, VT,
Op,
Op, ARMcc, Cmp);
5265 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5270 if (!
RHS.getNode()) {
5276 if (
LHS.getValueType() == MVT::i32) {
5287 if (Subtarget->hasFPARMv8Base() && (
TrueVal.getValueType() == MVT::f16 ||
5288 TrueVal.getValueType() == MVT::f32 ||
5289 TrueVal.getValueType() == MVT::f64)) {
5303 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5313 if (Subtarget->hasFPARMv8Base() &&
5315 (
TrueVal.getValueType() == MVT::f16 ||
5316 TrueVal.getValueType() == MVT::f32 ||
5317 TrueVal.getValueType() == MVT::f64)) {
5318 bool swpCmpOps =
false;
5319 bool swpVselOps =
false;
5333 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5336 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5346 if (!
N->hasOneUse())
5349 if (!
N->getNumValues())
5351 EVT VT =
Op.getValueType();
5352 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5369 return DAG.
getLoad(MVT::i32,
SDLoc(
Op), Ld->getChain(), Ld->getBasePtr(),
5370 Ld->getPointerInfo(), Ld->getAlign(),
5371 Ld->getMemOperand()->getFlags());
5387 SDValue Ptr = Ld->getBasePtr();
5389 DAG.
getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5390 Ld->getAlign(), Ld->getMemOperand()->
getFlags());
5395 RetVal2 = DAG.
getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5396 Ld->getPointerInfo().getWithOffset(4),
5398 Ld->getMemOperand()->getFlags());
5416 bool LHSSeenZero =
false;
5418 bool RHSSeenZero =
false;
5420 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5431 if (
LHS.getValueType() == MVT::f32) {
5437 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5449 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5450 return DAG.
getNode(ARMISD::BCC_i64, dl, MVT::Other,
Ops);
5465 return DAG.
getNode(ARMISD::CMOV,
DL, MVT::i32,
Op.getOperand(0), Neg,
5484 unsigned Opc =
Cond.getOpcode();
5486 !Subtarget->isThumb1Only();
5487 if (
Cond.getResNo() == 1 &&
5497 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
5502 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5517 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5522 if (!
RHS.getNode()) {
5530 unsigned Opc =
LHS.getOpcode();
5532 !Subtarget->isThumb1Only();
5544 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
LHS.getValue(0), DAG, ARMcc);
5551 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5555 if (
LHS.getValueType() == MVT::i32) {
5558 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5561 SDNodeFlags
Flags =
Op->getFlags();
5562 if (
Flags.hasNoNaNs() &&
5567 if (
SDValue Result = OptimizeVFPBrcond(
Op, DAG))
5581 Res = DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other,
Ops);
5595 Table = DAG.
getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5598 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5603 return DAG.
getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5604 Addr,
Op.getOperand(2), JTI);
5608 DAG.
getLoad((EVT)MVT::i32, dl, Chain, Addr,
5612 return DAG.
getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5615 DAG.
getLoad(PTy, dl, Chain, Addr,
5618 return DAG.
getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5623 EVT VT =
Op.getValueType();
5626 if (
Op.getValueType().getVectorElementType() == MVT::i32) {
5627 if (
Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5635 const EVT OpTy =
Op.getOperand(0).getValueType();
5636 if (OpTy == MVT::v4f32)
5638 else if (OpTy == MVT::v4f16 && HasFullFP16)
5640 else if (OpTy == MVT::v8f16 && HasFullFP16)
5645 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5648 Op = DAG.
getNode(
Op.getOpcode(), dl, NewTy,
Op.getOperand(0));
5653 EVT VT =
Op.getValueType();
5657 bool IsStrict =
Op->isStrictFPOpcode();
5658 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5660 if (isUnsupportedFloatingType(SrcVal.
getValueType())) {
5673 std::tie(Result, Chain) =
makeLibCall(DAG, LC,
Op.getValueType(), SrcVal,
5674 CallOptions, Loc, Chain);
5684 Loc,
Op.getValueType(), SrcVal);
5693 EVT VT =
Op.getValueType();
5695 EVT FromVT =
Op.getOperand(0).getValueType();
5697 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5699 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5700 Subtarget->hasFP64())
5702 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5703 Subtarget->hasFullFP16())
5705 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5706 Subtarget->hasMVEFloatOps())
5708 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5709 Subtarget->hasMVEFloatOps())
5712 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5729 EVT VT =
Op.getValueType();
5732 if (
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5738 assert((
Op.getOperand(0).getValueType() == MVT::v4i16 ||
5739 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5740 "Invalid type for custom lowering!");
5745 if (VT == MVT::v4f32)
5746 DestVecType = MVT::v4i32;
5747 else if (VT == MVT::v4f16 && HasFullFP16)
5748 DestVecType = MVT::v4i16;
5749 else if (VT == MVT::v8f16 && HasFullFP16)
5750 DestVecType = MVT::v8i16;
5756 switch (
Op.getOpcode()) {
5768 Op = DAG.
getNode(CastOpc, dl, DestVecType,
Op.getOperand(0));
5773 EVT VT =
Op.getValueType();
5776 if (isUnsupportedFloatingType(VT)) {
5786 CallOptions, SDLoc(
Op)).first;
5797 EVT VT =
Op.getValueType();
5801 bool UseNEON = !InGPR && Subtarget->hasNEON();
5808 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5815 if (SrcVT == MVT::f32) {
5818 Tmp1 = DAG.
getNode(ARMISD::VSHLIMM, dl, OpVT,
5821 }
else if (VT == MVT::f32)
5822 Tmp1 = DAG.
getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5837 if (VT == MVT::f32) {
5849 if (SrcVT == MVT::f64)
5858 if (VT == MVT::f32) {
5871 return DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi);
5879 EVT VT =
Op.getValueType();
5881 unsigned Depth =
Op.getConstantOperandVal(0);
5883 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
5887 MachinePointerInfo());
5896 const ARMBaseRegisterInfo &ARI =
5897 *
static_cast<const ARMBaseRegisterInfo*
>(RegInfo);
5902 EVT VT =
Op.getValueType();
5904 unsigned Depth =
Op.getConstantOperandVal(0);
5909 MachinePointerInfo());
5917 return StringSwitch<Register>(
RegName)
5918 .Case(
"sp", ARM::SP)
5929 assert(
N->getValueType(0) == MVT::i64
5930 &&
"ExpandREAD_REGISTER called for non-i64 type result.");
5933 DAG.
getVTList(MVT::i32, MVT::i32, MVT::Other),
5973 const APInt &APIntIndex = Index->getAPIntValue();
5975 NewIndex *= APIntIndex;
6004 EVT SrcVT =
Op.getValueType();
6005 EVT DstVT =
N->getValueType(0);
6007 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6008 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6009 return MoveToHPR(SDLoc(
N), DAG, MVT::i32, DstVT.
getSimpleVT(),
6012 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6013 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
6014 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
6021 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6033 DAG.
getNode(ARMISD::VMOVDRR, dl, MVT::f64,
Lo,
Hi));
6041 Cvt = DAG.
getNode(ARMISD::VMOVRRD, dl,
6043 DAG.
getNode(ARMISD::VREV64, dl, SrcVT,
Op));
6045 Cvt = DAG.
getNode(ARMISD::VMOVRRD, dl,
6065 SDValue Vmov = DAG.
getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6074 EVT VT =
Op.getValueType();
6096 DAG.
getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6106 DAG.
getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6117 EVT VT =
Op.getValueType();
6138 DAG.
getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6159 DAG.
getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6211 Chain, DAG.
getConstant(Intrinsic::arm_set_fpscr,
DL, MVT::i32), FPSCR};
6239 Chain, DAG.
getConstant(Intrinsic::arm_set_fpscr,
DL, MVT::i32), FPSCR};
6269 EVT VT =
N->getValueType(0);
6270 if (VT.
isVector() && ST->hasNEON()) {
6279 if (ElemTy == MVT::i8) {
6287 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6290 unsigned NumBits = ElemTy.getSizeInBits();
6292 DAG.
getNode(ARMISD::VMOVIMM, dl, VT,
6302 if (ElemTy == MVT::i64) {
6315 if (!ST->hasV6T2Ops())
6324 EVT VT =
N->getValueType(0);
6327 assert(ST->hasNEON() &&
"Custom ctpop lowering requires NEON.");
6328 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6329 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6330 "Unexpected type for custom ctpop lowering");
6338 unsigned EltSize = 8;
6361 Op =
Op.getOperand(0);
6363 APInt SplatBits, SplatUndef;
6364 unsigned SplatBitSize;
6367 !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6369 SplatBitSize > ElementBits)
6380 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6384 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6395 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6400 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6401 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6410 EVT VT =
N->getValueType(0);
6425 return DAG.
getNode(ARMISD::VSHLIMM, dl, VT,
N->getOperand(0),
6427 return DAG.
getNode(ARMISD::VSHLu, dl, VT,
N->getOperand(0),
6432 "unexpected vector shift opcode");
6434 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
6435 unsigned VShiftOpc =
6436 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6437 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
6443 EVT ShiftVT =
N->getOperand(1).getValueType();
6446 unsigned VShiftOpc =
6447 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6448 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0), NegatedCount);
6453 EVT VT =
N->getValueType(0);
6462 "Unknown shift to lower!");
6464 unsigned ShOpc =
N->getOpcode();
6465 if (ST->hasMVEIntegerOps()) {
6467 unsigned ShPartsOpc = ARMISD::LSLL;
6488 ShPartsOpc = ARMISD::LSRL;
6490 ShPartsOpc = ARMISD::ASRL;
6495 DAG.
SplitScalar(
N->getOperand(0), dl, MVT::i32, MVT::i32);
6509 if (ST->isThumb1Only())
6514 std::tie(
Lo,
Hi) = DAG.
SplitScalar(
N->getOperand(0), dl, MVT::i32, MVT::i32);
6518 unsigned Opc =
N->getOpcode() ==
ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6522 Lo = DAG.
getNode(ARMISD::RRX, dl, MVT::i32,
Lo,
Hi.getValue(1));
6530 bool Invert =
false;
6537 EVT VT =
Op.getValueType();
6545 assert(ST->hasMVEIntegerOps() &&
6546 "No hardware support for integer vector comparison!");
6548 if (
Op.getValueType().getVectorElementType() != MVT::i1)
6569 SDValue Reversed = DAG.
getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6573 Merged = DAG.
getNOT(dl, Merged, CmpVT);
6583 switch (SetCCOpcode) {
6587 if (ST->hasMVEFloatOps()) {
6590 Invert =
true; [[fallthrough]];
6595 case ISD::SETLT: Swap =
true; [[fallthrough]];
6599 case ISD::SETLE: Swap =
true; [[fallthrough]];
6615 Result = DAG.
getNOT(dl, Result, VT);
6618 case ISD::SETUO: Invert =
true; [[fallthrough]];
6627 Result = DAG.
getNOT(dl, Result, VT);
6633 switch (SetCCOpcode) {
6636 if (ST->hasMVEIntegerOps()) {
6639 Invert =
true; [[fallthrough]];
6642 case ISD::SETLT: Swap =
true; [[fallthrough]];
6644 case ISD::SETLE: Swap =
true; [[fallthrough]];
6661 if (AndOp.getNode() && AndOp.getOpcode() ==
ISD::BITCAST)
6664 if (AndOp.getNode() && AndOp.getOpcode() ==
ISD::AND) {
6669 Result = DAG.
getNOT(dl, Result, VT);
6694 Result = DAG.
getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6697 Result = DAG.
getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6703 Result = DAG.
getNOT(dl, Result, VT);
6712 assert(
LHS.getSimpleValueType().isInteger() &&
"SETCCCARRY is integer only.");
6729 return DAG.
getNode(ARMISD::CMOV,
DL,
Op.getValueType(), FVal, TVal, ARMcc,
6740 unsigned OpCmode, Imm;
6751 switch (SplatBitSize) {
6756 assert((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big");
6759 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6764 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6765 if ((SplatBits & ~0xff) == 0) {
6771 if ((SplatBits & ~0xff00) == 0) {
6774 Imm = SplatBits >> 8;
6784 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6785 if ((SplatBits & ~0xff) == 0) {
6791 if ((SplatBits & ~0xff00) == 0) {
6794 Imm = SplatBits >> 8;
6797 if ((SplatBits & ~0xff0000) == 0) {
6800 Imm = SplatBits >> 16;
6803 if ((SplatBits & ~0xff000000) == 0) {
6806 Imm = SplatBits >> 24;
6813 if ((SplatBits & ~0xffff) == 0 &&
6814 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6817 Imm = SplatBits >> 8;
6825 if ((SplatBits & ~0xffffff) == 0 &&
6826 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6829 Imm = SplatBits >> 16;
6845 unsigned ImmMask = 1;
6847 for (
int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6848 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6850 }
else if ((SplatBits & BitMask) != 0) {
6859 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6873 EVT VT =
Op.getValueType();
6874 bool IsDouble = (VT == MVT::f64);
6880 if (
ST->genExecuteOnly()) {
6882 assert((!
ST->isThumb1Only() ||
ST->hasV8MBaselineOps()) &&
6883 "Unexpected architecture");
6901 return DAG.
getNode(ARMISD::VMOVSR,
DL, VT,
6906 if (!
ST->hasVFP3Base())
6911 if (IsDouble && !Subtarget->hasFP64())
6918 if (IsDouble || !
ST->useNEONForSinglePrecisionFP()) {
6936 if (!
ST->hasNEON() || (!IsDouble && !
ST->useNEONForSinglePrecisionFP()))
6945 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6999 unsigned ExpectedElt = Imm;
7000 for (
unsigned i = 1; i < NumElts; ++i) {
7004 if (ExpectedElt == NumElts)
7007 if (M[i] < 0)
continue;
7008 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
7016 bool &ReverseVEXT,
unsigned &Imm) {
7018 ReverseVEXT =
false;
7029 unsigned ExpectedElt = Imm;
7030 for (
unsigned i = 1; i < NumElts; ++i) {
7034 if (ExpectedElt == NumElts * 2) {
7039 if (M[i] < 0)
continue;
7040 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
7055 return VT == MVT::v8i8 && M.size() == 8;
7060 if (Mask.size() == Elements * 2)
7061 return Index / Elements;
7062 return Mask[Index] == 0 ? 0 : 1;
7092 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7100 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7102 for (
unsigned j = 0; j < NumElts; j += 2) {
7103 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != j + WhichResult) ||
7104 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != j + NumElts + WhichResult))
7109 if (M.size() == NumElts*2)
7124 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7127 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7129 for (
unsigned j = 0; j < NumElts; j += 2) {
7130 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != j + WhichResult) ||
7131 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != j + WhichResult))
7136 if (M.size() == NumElts*2)
7156 if (M.size() != NumElts && M.size() != NumElts*2)
7159 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7161 for (
unsigned j = 0; j < NumElts; ++j) {
7162 if (M[i+j] >= 0 && (
unsigned) M[i+j] != 2 * j + WhichResult)
7167 if (M.size() == NumElts*2)
7186 if (M.size() != NumElts && M.size() != NumElts*2)
7189 unsigned Half = NumElts / 2;
7190 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7192 for (
unsigned j = 0; j < NumElts; j += Half) {
7193 unsigned Idx = WhichResult;
7194 for (
unsigned k = 0; k < Half; ++k) {
7195 int MIdx = M[i + j + k];
7196 if (MIdx >= 0 && (
unsigned) MIdx != Idx)
7203 if (M.size() == NumElts*2)
7227 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7230 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7232 unsigned Idx = WhichResult * NumElts / 2;
7233 for (
unsigned j = 0; j < NumElts; j += 2) {
7234 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != Idx) ||
7235 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != Idx + NumElts))
7241 if (M.size() == NumElts*2)
7260 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7263 for (
unsigned i = 0; i < M.size(); i += NumElts) {
7265 unsigned Idx = WhichResult * NumElts / 2;
7266 for (
unsigned j = 0; j < NumElts; j += 2) {
7267 if ((M[i+j] >= 0 && (
unsigned) M[i+j] != Idx) ||
7268 (M[i+j+1] >= 0 && (
unsigned) M[i+j+1] != Idx))
7274 if (M.size() == NumElts*2)
7287 unsigned &WhichResult,
7290 if (
isVTRNMask(ShuffleMask, VT, WhichResult))
7291 return ARMISD::VTRN;
7292 if (
isVUZPMask(ShuffleMask, VT, WhichResult))
7293 return ARMISD::VUZP;
7294 if (
isVZIPMask(ShuffleMask, VT, WhichResult))
7295 return ARMISD::VZIP;
7299 return ARMISD::VTRN;
7301 return ARMISD::VUZP;
7303 return ARMISD::VZIP;
7312 if (NumElts != M.size())
7316 for (
unsigned i = 0; i != NumElts; ++i)
7317 if (M[i] >= 0 && M[i] != (
int) (NumElts - 1 - i))
7326 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7334 int Ofs = Top ? 1 : 0;
7335 int Upper = SingleSource ? 0 : NumElts;
7336 for (
int i = 0, e = NumElts / 2; i != e; ++i) {
7337 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7339 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs +
Upper)
7348 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7357 unsigned Offset = Top ? 0 : 1;
7358 unsigned N = SingleSource ? 0 : NumElts;
7359 for (
unsigned i = 0; i < NumElts; i += 2) {
7360 if (M[i] >= 0 && M[i] != (
int)i)
7362 if (M[i + 1] >= 0 && M[i + 1] != (
int)(
N + i +
Offset))
7371 if (NumElts != M.size())
7379 unsigned Off0 = rev ? NumElts / 2 : 0;
7380 unsigned Off1 = rev ? 0 : NumElts / 2;
7381 for (
unsigned i = 0; i < NumElts; i += 2) {
7382 if (M[i] >= 0 && M[i] != (
int)(Off0 + i / 2))
7384 if (M[i + 1] >= 0 && M[i + 1] != (
int)(Off1 + i / 2))
7400 if (!ST->hasMVEFloatOps())
7405 if (VT != MVT::v8f16)
7426 for (
unsigned i = 1; i < 4; i++) {
7441 return DAG.
getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7453 if (!ST->hasMVEFloatOps())
7458 if (VT != MVT::v4f32)
7474 for (
unsigned i = 1; i < 4; i++) {
7485 return DAG.
getNode(ARMISD::VCVTL, dl, VT, Op0,
7497 Val =
N->getAsZExtVal();
7499 if (ST->isThumb1Only()) {
7500 if (Val <= 255 || ~Val <= 255)
7512 EVT VT =
Op.getValueType();
7514 assert(ST->hasMVEIntegerOps() &&
"LowerBUILD_VECTOR_i1 called without MVE!");
7518 unsigned BitsPerBool;
7522 }
else if (NumElts == 4) {
7525 }
else if (NumElts == 8) {
7528 }
else if (NumElts == 16) {
7539 return U.get().isUndef() || U.get() == FirstOp;
7543 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl,
Op.getValueType(), Ext);
7547 unsigned Bits32 = 0;
7548 for (
unsigned i = 0; i < NumElts; ++i) {
7552 bool BitSet = V.isUndef() ?
false : V->getAsZExtVal();
7554 Bits32 |= BoolMask << (i * BitsPerBool);
7560 for (
unsigned i = 0; i < NumElts; ++i) {
7573 if (!ST->hasMVEIntegerOps())
7577 EVT VT =
Op.getValueType();
7587 if (
N != 1 &&
N != 2 &&
N != 4 &&
N != 8)
7591 for (
unsigned I = 2;
I < NumElts;
I++) {
7607 switch (
N->getOpcode()) {
7618 return N->getOperand(1).getNode() ==
Op;
7620 switch (
N->getConstantOperandVal(0)) {
7621 case Intrinsic::arm_mve_add_predicated:
7622 case Intrinsic::arm_mve_mul_predicated:
7623 case Intrinsic::arm_mve_qadd_predicated:
7624 case Intrinsic::arm_mve_vhadd:
7625 case Intrinsic::arm_mve_hadd_predicated:
7626 case Intrinsic::arm_mve_vqdmulh:
7627 case Intrinsic::arm_mve_qdmulh_predicated:
7628 case Intrinsic::arm_mve_vqrdmulh:
7629 case Intrinsic::arm_mve_qrdmulh_predicated:
7630 case Intrinsic::arm_mve_vqdmull:
7631 case Intrinsic::arm_mve_vqdmull_predicated:
7633 case Intrinsic::arm_mve_sub_predicated:
7634 case Intrinsic::arm_mve_qsub_predicated:
7635 case Intrinsic::arm_mve_vhsub:
7636 case Intrinsic::arm_mve_hsub_predicated:
7637 return N->getOperand(2).getNode() ==
Op;
7652 EVT VT =
Op.getValueType();
7660 APInt SplatBits, SplatUndef;
7661 unsigned SplatBitSize;
7663 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7670 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7672 [BVN](
const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7673 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7674 : SplatBitSize == 16 ? MVT::v8i16
7678 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7681 if ((
ST->hasNEON() && SplatBitSize <= 64) ||
7682 (
ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7687 SplatBitSize, DAG, dl, VmovVT, VT,
VMOVModImm);
7691 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7695 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7697 NegatedImm, SplatUndef.
getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7701 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7705 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7709 return DAG.
getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7715 if (
ST->hasMVEIntegerOps() &&
7716 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7717 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7718 : SplatBitSize == 16 ? MVT::v8i16
7722 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7735 bool isOnlyLowElement =
true;
7736 bool usesOnlyOneValue =
true;
7737 bool hasDominantValue =
false;
7742 DenseMap<SDValue, unsigned> ValueCounts;
7744 for (
unsigned i = 0; i < NumElts; ++i) {
7749 isOnlyLowElement =
false;
7753 unsigned &
Count = ValueCounts[
V];
7756 if (++
Count > (NumElts / 2)) {
7757 hasDominantValue =
true;
7761 if (ValueCounts.
size() != 1)
7762 usesOnlyOneValue =
false;
7763 if (!
Value.getNode() && !ValueCounts.
empty())
7766 if (ValueCounts.
empty())
7778 if (hasDominantValue && EltSize <= 32) {
7787 ConstantSDNode *constIndex;
7794 if (VT !=
Value->getOperand(0).getValueType()) {
7797 N = DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
7802 N = DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
7807 if (!usesOnlyOneValue) {
7810 for (
unsigned I = 0;
I < NumElts; ++
I) {
7815 Ops.push_back(
Op.getOperand(
I));
7825 assert(FVT == MVT::f32 || FVT == MVT::f16);
7826 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7827 for (
unsigned i = 0; i < NumElts; ++i)
7832 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7836 if (usesOnlyOneValue) {
7839 return DAG.
getNode(ARMISD::VDUP, dl, VT, Val);
7863 if (
ST->hasNEON() && VT.
is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
7883 if (EltSize >= 32) {
7889 for (
unsigned i = 0; i < NumElts; ++i)
7903 for (
unsigned i = 0 ; i < NumElts; ++i) {
7922 EVT VT =
Op.getValueType();
7925 struct ShuffleSourceInfo {
7927 unsigned MinElt = std::numeric_limits<unsigned>::max();
7928 unsigned MaxElt = 0;
7938 int WindowScale = 1;
7940 ShuffleSourceInfo(
SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
7948 for (
unsigned i = 0; i < NumElts; ++i) {
7963 SDValue SourceVec =
V.getOperand(0);
7965 if (Source == Sources.
end())
7969 unsigned EltNo =
V.getConstantOperandVal(1);
7976 if (Sources.
size() > 2)
7982 for (
auto &Source : Sources) {
7983 EVT SrcEltTy =
Source.Vec.getValueType().getVectorElementType();
7984 if (SrcEltTy.
bitsLT(SmallestEltTy))
7985 SmallestEltTy = SrcEltTy;
7987 unsigned ResMultiplier =
7995 for (
auto &Src : Sources) {
7996 EVT SrcVT = Src.ShuffleVec.getValueType();
8000 if (SrcVTSize == VTSize)
8009 if (SrcVTSize < VTSize) {
8010 if (2 * SrcVTSize != VTSize)
8016 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
8020 if (SrcVTSize != 2 * VTSize)
8023 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8028 if (Src.MinElt >= NumSrcElts) {
8033 Src.WindowBase = -NumSrcElts;
8034 }
else if (Src.MaxElt < NumSrcElts) {
8048 Src.ShuffleVec = DAG.
getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8051 Src.WindowBase = -Src.MinElt;
8058 for (
auto &Src : Sources) {
8059 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8060 if (SrcEltTy == SmallestEltTy)
8063 Src.ShuffleVec = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8065 Src.WindowBase *= Src.WindowScale;
8070 for (
auto Src : Sources)
8071 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8079 if (
Entry.isUndef())
8088 EVT OrigEltTy =
Entry.getOperand(0).getValueType().getVectorElementType();
8091 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8095 int *LaneMask = &
Mask[i * ResMultiplier];
8097 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8098 ExtractBase += NumElts * (Src - Sources.begin());
8099 for (
int j = 0;
j < LanesDefined; ++
j)
8100 LaneMask[j] = ExtractBase + j;
8106 assert(Sources.size() <= 2 &&
"Too many sources!");
8109 for (
unsigned i = 0; i < Sources.size(); ++i)
8116 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8138 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8158 unsigned PFIndexes[4];
8159 for (
unsigned i = 0; i != 4; ++i) {
8163 PFIndexes[i] = M[i];
8167 unsigned PFTableIndex =
8168 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8170 unsigned Cost = (PFEntry >> 30);
8176 bool ReverseVEXT, isV_UNDEF;
8177 unsigned Imm, WhichResult;
8180 if (EltSize >= 32 ||
8187 else if (Subtarget->hasNEON() &&
8192 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8195 else if (Subtarget->hasMVEIntegerOps() &&
8199 else if (Subtarget->hasMVEIntegerOps() &&
8213 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8214 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8215 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8218 if (LHSID == (1*9+2)*9+3)
return LHS;
8219 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
8233 return DAG.
getNode(ARMISD::VREV64, dl, VT, OpLHS);
8236 return DAG.
getNode(ARMISD::VREV32, dl, VT, OpLHS);
8239 return DAG.
getNode(ARMISD::VREV16, dl, VT, OpLHS);
8244 return DAG.
getNode(ARMISD::VDUPLANE, dl, VT,
8249 return DAG.
getNode(ARMISD::VEXT, dl, VT,
8276 for (
int I : ShuffleMask)
8280 return DAG.
getNode(ARMISD::VTBL1,
DL, MVT::v8i8, V1,
8283 return DAG.
getNode(ARMISD::VTBL2,
DL, MVT::v8i8, V1, V2,
8289 EVT VT =
Op.getValueType();
8291 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8292 "Expect an v8i16/v16i8 type");
8298 std::vector<int> NewMask;
8302 NewMask.push_back(i);
8332 AllZeroes = DAG.
getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8342 if (VT != MVT::v16i1)
8343 RecastV1 = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8358 EVT VT =
Op.getValueType();
8362 assert(ST->hasMVEIntegerOps() &&
8363 "No support for vector shuffle of boolean predicates");
8373 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8389 "Expected identical vector type in expanded i1 shuffle!");
8393 PredAsVector2, ShuffleMask);
8398 if (VT == MVT::v2i1) {
8399 SDValue BC = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8402 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8404 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8415 EVT VT =
Op.getValueType();
8419 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8420 "Unexpected vector type");
8422 int QuarterSize = NumElts / 4;
8431 for (
int i = 0; i <
Length; i++) {
8432 if (ShuffleMask[Start + i] >= 0) {
8433 if (ShuffleMask[Start + i] %
Length != i)
8435 MovIdx = ShuffleMask[Start + i] /
Length;
8443 for (
int i = 1; i <
Length; i++) {
8444 if (ShuffleMask[Start + i] >= 0 &&
8445 (ShuffleMask[Start + i] /
Length != MovIdx ||
8446 ShuffleMask[Start + i] %
Length != i))
8452 for (
int Part = 0; Part < 4; ++Part) {
8454 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8468 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8473 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8475 for (
int Part = 0; Part < 4; ++Part)
8476 for (
int i = 0; i < QuarterSize; i++)
8478 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8480 VT, dl,
Op->getOperand(0),
Op->getOperand(1), NewShuffleMask);
8483 for (
int Part = 0; Part < 4; ++Part)
8499 EVT VT =
Op.getValueType();
8511 for (
int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8515 if (Mask[i] != i + BaseOffset) {
8516 if (OffElement == -1)
8522 return NonUndef > 2 && OffElement != -1;
8526 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8528 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8539 ShuffleMask[OffElement] < (
int)NumElts ? V1 : V2,
8550 EVT VT =
Op.getValueType();
8554 if (ST->hasMVEIntegerOps() && EltSize == 1)
8565 if (EltSize <= 32) {
8569 if (Lane == -1) Lane = 0;
8580 bool IsScalarToVector =
true;
8583 IsScalarToVector =
false;
8586 if (IsScalarToVector)
8589 return DAG.
getNode(ARMISD::VDUPLANE, dl, VT, V1,
8593 bool ReverseVEXT =
false;
8595 if (ST->hasNEON() &&
isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8598 return DAG.
getNode(ARMISD::VEXT, dl, VT, V1, V2,
8603 return DAG.
getNode(ARMISD::VREV64, dl, VT, V1);
8605 return DAG.
getNode(ARMISD::VREV32, dl, VT, V1);
8607 return DAG.
getNode(ARMISD::VREV16, dl, VT, V1);
8610 return DAG.
getNode(ARMISD::VEXT, dl, VT, V1, V1,
8619 unsigned WhichResult = 0;
8620 bool isV_UNDEF =
false;
8621 if (ST->hasNEON()) {
8623 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8630 if (ST->hasMVEIntegerOps()) {
8632 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8635 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8638 return DAG.
getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8665 }) &&
"Unexpected shuffle index into UNDEF operand!");
8668 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8671 assert((WhichResult == 0) &&
8672 "In-place shuffle of concat can only have one result!");
8681 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8685 for (
bool Top : {
false,
true}) {
8686 for (
bool SingleSource : {
false,
true}) {
8687 if (
isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8692 SingleSource ? V1 : V2);
8708 unsigned PFIndexes[4];
8709 for (
unsigned i = 0; i != 4; ++i) {
8710 if (ShuffleMask[i] < 0)
8713 PFIndexes[i] = ShuffleMask[i];
8717 unsigned PFTableIndex =
8718 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8720 unsigned Cost = (PFEntry >> 30);
8726 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8727 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8737 if (EltSize >= 32) {
8745 for (
unsigned i = 0; i < NumElts; ++i) {
8746 if (ShuffleMask[i] < 0)
8750 ShuffleMask[i] < (
int)NumElts ? V1 : V2,
8758 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8762 if (ST->hasNEON() && VT == MVT::v8i8)
8766 if (ST->hasMVEIntegerOps())
8775 EVT VecVT =
Op.getOperand(0).getValueType();
8778 assert(ST->hasMVEIntegerOps() &&
8779 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8782 DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32,
Op->getOperand(0));
8783 unsigned Lane =
Op.getConstantOperandVal(2);
8784 unsigned LaneWidth =
8786 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8791 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl,
Op.getValueType(), BFI);
8804 if (Subtarget->hasMVEIntegerOps() &&
8805 Op.getValueType().getScalarSizeInBits() == 1)
8829 IVecIn, IElt, Lane);
8838 EVT VecVT =
Op.getOperand(0).getValueType();
8841 assert(ST->hasMVEIntegerOps() &&
8842 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8845 DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32,
Op->getOperand(0));
8846 unsigned Lane =
Op.getConstantOperandVal(1);
8847 unsigned LaneWidth =
8869 return DAG.
getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8878 assert(
Op.getValueType().getScalarSizeInBits() == 1 &&
8879 "Unexpected custom CONCAT_VECTORS lowering");
8881 "Unexpected custom CONCAT_VECTORS lowering");
8882 assert(ST->hasMVEIntegerOps() &&
8883 "CONCAT_VECTORS lowering only supported for MVE");
8887 EVT Op2VT = V2.getValueType();
8888 assert(Op1VT == Op2VT &&
"Operand types don't match!");
8889 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
8890 "Unexpected i1 concat operations!");
8903 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
8908 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8917 auto ExtractInto = [&DAG, &dl](
SDValue NewV,
SDValue ConVec,
unsigned &j) {
8918 EVT NewVT = NewV.getValueType();
8919 EVT ConcatVT = ConVec.getValueType();
8920 unsigned ExtScale = 1;
8921 if (NewVT == MVT::v2f64) {
8922 NewV = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
8935 ConVec = ExtractInto(NewV1, ConVec, j);
8936 ConVec = ExtractInto(NewV2, ConVec, j);
8940 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8946 while (ConcatOps.
size() > 1) {
8947 for (
unsigned I = 0,
E = ConcatOps.
size();
I !=
E;
I += 2) {
8950 ConcatOps[
I / 2] = ConcatPair(V1, V2);
8954 return ConcatOps[0];
8959 EVT VT =
Op->getValueType(0);
8965 assert(
Op.getValueType().is128BitVector() &&
Op.getNumOperands() == 2 &&
8966 "unexpected CONCAT_VECTORS");
8987 EVT VT =
Op.getValueType();
8993 "Unexpected custom EXTRACT_SUBVECTOR lowering");
8994 assert(ST->hasMVEIntegerOps() &&
8995 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9005 EVT SubVT = MVT::v4i32;
9007 for (
unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9017 return DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9022 for (
unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9031 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9038 assert(ST->hasMVEIntegerOps() &&
"Expected MVE!");
9039 EVT VT =
N->getValueType(0);
9040 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9041 "Expected a vector i1 type!");
9043 EVT FromVT =
Op.getValueType();
9054 if (!Subtarget->hasMVEIntegerOps())
9057 EVT ToVT =
N->getValueType(0);
9100 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9102 EVT FromVT =
N->getOperand(0).getValueType();
9103 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9114 if (!Subtarget->hasMVEIntegerOps())
9119 EVT ToVT =
N->getValueType(0);
9120 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9123 EVT FromVT =
Op.getValueType();
9124 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9138 Ext = DAG.
getNode(
N->getOpcode(),
DL, MVT::v8i32, Ext);
9139 Ext1 = DAG.
getNode(
N->getOpcode(),
DL, MVT::v8i32, Ext1);
9151 EVT VT =
N->getValueType(0);
9153 SDNode *BVN =
N->getOperand(0).getNode();
9158 unsigned HiElt = 1 - LoElt;
9163 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9179 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
9180 SDNode *Elt =
N->getOperand(i).getNode();
9183 unsigned HalfSize = EltSize / 2;
9185 if (!
isIntN(HalfSize,
C->getSExtValue()))
9188 if (!
isUIntN(HalfSize,
C->getZExtValue()))
9227 switch (OrigSimpleTy) {
9243 unsigned ExtOpcode) {
9266 if (ExtendedTy == LD->getMemoryVT())
9267 return DAG.
getLoad(LD->getMemoryVT(),
SDLoc(LD), LD->getChain(),
9268 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9269 LD->getMemOperand()->getFlags());
9275 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9276 LD->getMemoryVT(), LD->getAlign(),
9277 LD->getMemOperand()->getFlags());
9290 N->getOperand(0)->getValueType(0),
9296 "Expected extending load");
9302 DAG.
getNode(Opcode,
SDLoc(newLoad), LD->getValueType(0), newLoad);
9311 SDNode *BVN =
N->getOperand(0).getNode();
9313 BVN->
getValueType(0) == MVT::v4i32 &&
"expected v4i32 BUILD_VECTOR");
9321 EVT VT =
N->getValueType(0);
9327 for (
unsigned i = 0; i != NumElts; ++i) {
9328 const APInt &CInt =
N->getConstantOperandAPInt(i);
9337 unsigned Opcode =
N->getOpcode();
9339 SDNode *N0 =
N->getOperand(0).getNode();
9340 SDNode *N1 =
N->getOperand(1).getNode();
9348 unsigned Opcode =
N->getOpcode();
9350 SDNode *N0 =
N->getOperand(0).getNode();
9351 SDNode *N1 =
N->getOperand(1).getNode();
9361 EVT VT =
Op.getValueType();
9363 "unexpected type for custom-lowering ISD::MUL");
9364 SDNode *N0 =
Op.getOperand(0).getNode();
9365 SDNode *N1 =
Op.getOperand(1).getNode();
9366 unsigned NewOpc = 0;
9370 if (isN0SExt && isN1SExt)
9371 NewOpc = ARMISD::VMULLs;
9375 if (isN0ZExt && isN1ZExt)
9376 NewOpc = ARMISD::VMULLu;
9377 else if (isN1SExt || isN1ZExt) {
9381 NewOpc = ARMISD::VMULLs;
9384 NewOpc = ARMISD::VMULLu;
9388 NewOpc = ARMISD::VMULLu;
9394 if (VT == MVT::v2i64)
9411 "unexpected types for extended operands to VMULL");
9412 return DAG.
getNode(NewOpc,
DL, VT, Op0, Op1);
9447 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9481 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9484 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9505 EVT VT =
Op.getValueType();
9506 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9507 "unexpected type for custom-lowering ISD::SDIV");
9514 if (VT == MVT::v8i8) {
9542 EVT VT =
Op.getValueType();
9543 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9544 "unexpected type for custom-lowering ISD::UDIV");
9551 if (VT == MVT::v8i8) {
9590 DAG.
getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9593 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9597 DAG.
getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9617 unsigned Opcode,
bool IsSigned) {
9618 EVT VT0 =
Op.getValue(0).getValueType();
9619 EVT VT1 =
Op.getValue(1).getValueType();
9621 bool InvertCarry = Opcode == ARMISD::SUBE;
9641 EVT VT =
Op.getValueType();
9642 assert((VT == MVT::i32 || VT == MVT::i64) &&
9643 "unexpected type for custom lowering DIV");
9649 LC = VT == MVT::i32 ? RTLIB::SDIVREM_I32 : RTLIB::SDIVREM_I64;
9651 LC = VT == MVT::i32 ? RTLIB::UDIVREM_I32 : RTLIB::UDIVREM_I64;
9658 for (
auto AI : {1, 0}) {
9660 Args.emplace_back(Operand,
9677ARMTargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
9685 const bool MinSize =
ST.hasMinSize();
9686 const bool HasDivide =
ST.isThumb() ?
ST.hasDivideInThumbMode()
9687 :
ST.hasDivideInARMMode();
9691 if (
N->getOperand(0).getValueType().isVector())
9696 if (!(MinSize && HasDivide))
9709 if (Divisor.
sgt(128))
9717 assert(
Op.getValueType() == MVT::i32 &&
9718 "unexpected type for custom lowering DIV");
9721 SDValue DBZCHK = DAG.
getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9724 return LowerWindowsDIVLibCall(
Op, DAG,
Signed, DBZCHK);
9730 if (
N->getValueType(0) == MVT::i32)
9731 return DAG.
getNode(ARMISD::WIN__DBZCHK,
DL, MVT::Other, InChain,
Op);
9734 return DAG.
getNode(ARMISD::WIN__DBZCHK,
DL, MVT::Other, InChain,
9738void ARMTargetLowering::ExpandDIV_Windows(
9743 assert(
Op.getValueType() == MVT::i64 &&
9744 "unexpected type for custom lowering DIV");
9759std::pair<SDValue, SDValue>
9760ARMTargetLowering::LowerAEABIUnalignedLoad(
SDValue Op,
9766 EVT MemVT =
LD->getMemoryVT();
9767 if (MemVT != MVT::i32 && MemVT != MVT::i64)
9771 unsigned AS =
LD->getAddressSpace();
9772 Align Alignment =
LD->getAlign();
9774 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
9777 Alignment <= llvm::Align(2)) {
9780 (MemVT == MVT::i32) ? RTLIB::AEABI_UREAD4 : RTLIB::AEABI_UREAD8;
9786 Opts, dl,
LD->getChain());
9811 EVT MemVT =
ST->getMemoryVT();
9812 if (MemVT != MVT::i32 && MemVT != MVT::i64)
9816 unsigned AS =
ST->getAddressSpace();
9817 Align Alignment =
ST->getAlign();
9819 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
9822 Alignment <= llvm::Align(2)) {
9828 if (
ST->isTruncatingStore())
9832 (MemVT == MVT::i32) ? RTLIB::AEABI_UWRITE4 : RTLIB::AEABI_UWRITE8;
9836 makeLibCall(DAG, LC, MVT::isVoid, {StoreVal,
ST->getBasePtr()}, Opts,
9837 dl,
ST->getChain());
9839 return CallResult.second;
9850 EVT MemVT = LD->getMemoryVT();
9851 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9852 MemVT == MVT::v16i1) &&
9853 "Expected a predicate type!");
9854 assert(MemVT ==
Op.getValueType());
9856 "Expected a non-extending load");
9857 assert(LD->isUnindexed() &&
"Expected a unindexed load");
9871 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9873 LD->getMemOperand());
9879 SDValue Pred = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
9880 if (MemVT != MVT::v16i1)
9889 EVT MemVT =
LD->getMemoryVT();
9891 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9892 !Subtarget->isThumb1Only() &&
LD->isVolatile() &&
9893 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
9894 assert(
LD->isUnindexed() &&
"Loads should be unindexed at this point.");
9897 ARMISD::LDRD, dl, DAG.
getVTList({MVT::i32, MVT::i32, MVT::Other}),
9898 {LD->getChain(), LD->getBasePtr()}, MemVT,
LD->getMemOperand());
9903 }
else if (MemVT == MVT::i32 || MemVT == MVT::i64) {
9904 auto Pair = LowerAEABIUnalignedLoad(
SDValue(
N, 0), DAG);
9906 Results.push_back(Pair.first);
9907 Results.push_back(Pair.second);
9914 EVT MemVT = ST->getMemoryVT();
9915 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9916 MemVT == MVT::v16i1) &&
9917 "Expected a predicate type!");
9918 assert(MemVT == ST->getValue().getValueType());
9919 assert(!ST->isTruncatingStore() &&
"Expected a non-extending store");
9920 assert(ST->isUnindexed() &&
"Expected a unindexed store");
9925 SDValue Build = ST->getValue();
9926 if (MemVT != MVT::v16i1) {
9939 SDValue GRP = DAG.
getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
9945 ST->getChain(), dl, GRP, ST->getBasePtr(),
9947 ST->getMemOperand());
9953 EVT MemVT =
ST->getMemoryVT();
9955 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9956 !Subtarget->isThumb1Only() &&
ST->isVolatile() &&
9957 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
9958 assert(
ST->isUnindexed() &&
"Stores should be unindexed at this point.");
9959 SDNode *
N =
Op.getNode();
9972 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
9973 MemVT,
ST->getMemOperand());
9974 }
else if (Subtarget->hasMVEIntegerOps() &&
9975 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9976 MemVT == MVT::v16i1))) {
9978 }
else if (MemVT == MVT::i32 || MemVT == MVT::i64) {
9979 return LowerAEABIUnalignedStore(
Op, DAG);
9986 (
N->getOpcode() == ARMISD::VMOVIMM &&
9992 MVT VT =
Op.getSimpleValueType();
9994 SDValue PassThru =
N->getPassThru();
10005 VT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(), Mask, ZeroVec,
10006 N->getMemoryVT(),
N->getMemOperand(),
N->getAddressingMode(),
10007 N->getExtensionType(),
N->isExpandingLoad());
10010 PassThru.
getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10012 if (!PassThru.
isUndef() && !PassThruIsCastZero)
10019 if (!ST->hasMVEIntegerOps())
10023 unsigned BaseOpcode = 0;
10024 switch (
Op->getOpcode()) {
10040 unsigned NumActiveLanes = NumElts;
10042 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10043 NumActiveLanes == 2) &&
10044 "Only expected a power 2 vector size");
10048 while (NumActiveLanes > 4) {
10049 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10051 Op0 = DAG.
getNode(BaseOpcode, dl, VT, Op0, Rev);
10052 NumActiveLanes /= 2;
10056 if (NumActiveLanes == 4) {
10066 SDValue Res0 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
10067 SDValue Res1 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext2, Ext3,
Op->getFlags());
10068 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Res0, Res1,
Op->getFlags());
10074 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
10078 if (EltVT !=
Op->getValueType(0))
10085 if (!ST->hasMVEFloatOps())
10092 if (!ST->hasNEON())
10100 unsigned PairwiseIntrinsic = 0;
10101 switch (
Op->getOpcode()) {
10105 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10108 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10111 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10114 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10120 unsigned NumActiveLanes = NumElts;
10122 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10123 NumActiveLanes == 2) &&
10124 "Only expected a power 2 vector size");
10130 VT =
Lo.getValueType();
10132 NumActiveLanes /= 2;
10136 while (NumActiveLanes > 1) {
10138 NumActiveLanes /= 2;
10145 if (EltVT !=
Op.getValueType()) {
10146 unsigned Extend = 0;
10147 switch (
Op->getOpcode()) {
10159 Res = DAG.
getNode(Extend, dl,
Op.getValueType(), Res);
10204 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10210 SDLoc dl(V.getNode());
10211 auto [VLo, VHi] = DAG.
SplitScalar(V, dl, MVT::i32, MVT::i32);
10221 assert(
N->getValueType(0) == MVT::i64 &&
10222 "AtomicCmpSwap on types less than 64 should be legal");
10231 ARM::CMP_SWAP_64,
SDLoc(
N),
10232 DAG.
getVTList(MVT::Untyped, MVT::Untyped, MVT::Other),
Ops);
10251 EVT VT =
Op.getValueType();
10260 if (isUnsupportedFloatingType(
LHS.getValueType())) {
10262 Chain, IsSignaling);
10263 if (!
RHS.getNode()) {
10279 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10281 ARMcc = DAG.
getConstant(CondCode2, dl, MVT::i32);
10282 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10299 MVT SVT =
Op.getOperand(0).getSimpleValueType();
10302 makeLibCall(DAG, LC, MVT::f32,
Op.getOperand(0), CallOptions,
DL).first;
10315 if (!IsSigned && Subtarget->isThumb1Only()) {
10333 Sub1Result, Sub1Result, Flags1);
10348 if (
Op.getValueType() != MVT::i32)
10362 unsigned Opcode = ARMISD::SUBC;
10371 bool CanUseAdd =
false;
10387 Opcode = ARMISD::ADDC;
10411 SDValue Result1 = DAG.
getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
10412 GTCondValue, Flags);
10416 SDValue Result2 = DAG.
getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
10417 LTCondValue, Flags);
10419 if (
Op.getValueType() != MVT::i32)
10427 switch (
Op.getOpcode()) {
10459 case ISD::BITCAST:
return ExpandBITCAST(
Op.getNode(), DAG, Subtarget);
10463 case ISD::SREM:
return LowerREM(
Op.getNode(), DAG);
10464 case ISD::UREM:
return LowerREM(
Op.getNode(), DAG);
10486 return LowerSET_FPMODE(
Op, DAG);
10488 return LowerRESET_FPMODE(
Op, DAG);
10492 !
Op.getValueType().isVector())
10493 return LowerDIV_Windows(
Op, DAG,
true);
10497 !
Op.getValueType().isVector())
10498 return LowerDIV_Windows(
Op, DAG,
false);
10514 return LowerALUO(
Op, DAG);
10522 EVT MemVT = LD->getMemoryVT();
10523 if (Subtarget->hasMVEIntegerOps() &&
10524 (MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10525 MemVT == MVT::v16i1))
10528 auto Pair = LowerAEABIUnalignedLoad(
Op, DAG);
10534 return LowerSTORE(
Op, DAG, Subtarget);
10559 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
10568 return LowerSPONENTRY(
Op, DAG);
10570 return LowerFP_TO_BF16(
Op, DAG);
10571 case ARMISD::WIN__DBZCHK:
return SDValue();
10574 return LowerCMP(
Op, DAG);
10576 return LowerABS(
Op, DAG);
10581 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
10582 Op.getOperand(1).getValueType() == MVT::bf16) &&
10583 "Expected custom lowering of rounding operations only for f16");
10586 {
Op.getOperand(0),
Op.getOperand(1)});
10587 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
10588 {Ext.getValue(1), Ext.getValue(0)});
10595 unsigned IntNo =
N->getConstantOperandVal(0);
10597 if (IntNo == Intrinsic::arm_smlald)
10598 Opc = ARMISD::SMLALD;
10599 else if (IntNo == Intrinsic::arm_smlaldx)
10600 Opc = ARMISD::SMLALDX;
10601 else if (IntNo == Intrinsic::arm_smlsld)
10602 Opc = ARMISD::SMLSLD;
10603 else if (IntNo == Intrinsic::arm_smlsldx)
10604 Opc = ARMISD::SMLSLDX;
10610 std::tie(
Lo,
Hi) = DAG.
SplitScalar(
N->getOperand(3), dl, MVT::i32, MVT::i32);
10614 N->getOperand(1),
N->getOperand(2),
10626 switch (
N->getOpcode()) {
10633 Res = ExpandBITCAST(
N, DAG, Subtarget);
10642 Res = LowerREM(
N, DAG);
10646 Res = LowerDivRem(
SDValue(
N, 0), DAG);
10663 "can only expand DIV on Windows");
10675 Res = LowerAEABIUnalignedStore(
SDValue(
N, 0), DAG);
10704 "ROPI/RWPI not currently supported with SjLj");
10713 bool isThumb = Subtarget->isThumb();
10714 bool isThumb2 = Subtarget->
isThumb2();
10717 unsigned PCAdj = (
isThumb || isThumb2) ? 4 : 8;
10723 : &ARM::GPRRegClass;
10829 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
10832 MachineRegisterInfo *MRI = &MF->
getRegInfo();
10836 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10837 : &ARM::GPRnopcRegClass;
10841 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10842 unsigned MaxCSNum = 0;
10843 for (MachineBasicBlock &BB : *MF) {
10849 for (MachineInstr &
II : BB) {
10850 if (!
II.isEHLabel())
10853 MCSymbol *Sym =
II.getOperand(0).getMCSymbol();
10854 if (!MF->hasCallSiteLandingPad(Sym))
continue;
10856 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10857 for (
unsigned Idx : CallSiteIdxs) {
10858 CallSiteNumToLPad[Idx].push_back(&BB);
10859 MaxCSNum = std::max(MaxCSNum, Idx);
10866 std::vector<MachineBasicBlock*> LPadList;
10867 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10868 LPadList.reserve(CallSiteNumToLPad.
size());
10869 for (
unsigned I = 1;
I <= MaxCSNum; ++
I) {
10870 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[
I];
10871 for (MachineBasicBlock *
MBB : MBBList) {
10872 LPadList.push_back(
MBB);
10877 assert(!LPadList.empty() &&
10878 "No landing pad destinations for the dispatch jump table!");
10881 MachineJumpTableInfo *JTI =
10888 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10891 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10893 BuildMI(TrapBB, dl,
TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
10896 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10900 MF->insert(MF->end(), DispatchBB);
10901 MF->insert(MF->end(), DispContBB);
10902 MF->insert(MF->end(), TrapBB);
10906 SetupEntryBlockForSjLj(
MI,
MBB, DispatchBB, FI);
10908 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10912 MachineInstrBuilder MIB;
10913 MIB =
BuildMI(DispatchBB, dl,
TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10915 const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*
>(
TII);
10925 unsigned NumLPads = LPadList.size();
10926 if (Subtarget->isThumb2()) {
10928 BuildMI(DispatchBB, dl,
TII->get(ARM::t2LDRi12), NewVReg1)
10934 if (NumLPads < 256) {
10935 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPri))
10937 .
addImm(LPadList.size())
10941 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVi16), VReg1)
10942 .
addImm(NumLPads & 0xFFFF)
10945 unsigned VReg2 = VReg1;
10946 if ((NumLPads & 0xFFFF0000) != 0) {
10948 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVTi16), VReg2)
10954 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPrr))
10960 BuildMI(DispatchBB, dl,
TII->get(ARM::t2Bcc))
10966 BuildMI(DispContBB, dl,
TII->get(ARM::t2LEApcrelJT), NewVReg3)
10971 BuildMI(DispContBB, dl,
TII->get(ARM::t2ADDrs), NewVReg4)
10978 BuildMI(DispContBB, dl,
TII->get(ARM::t2BR_JT))
10982 }
else if (Subtarget->isThumb()) {
10984 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRspi), NewVReg1)
10990 if (NumLPads < 256) {
10991 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPi8))
10996 MachineConstantPool *
ConstantPool = MF->getConstantPool();
11001 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
11002 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11005 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRpci))
11009 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPr))
11015 BuildMI(DispatchBB, dl,
TII->get(ARM::tBcc))
11021 BuildMI(DispContBB, dl,
TII->get(ARM::tLSLri), NewVReg2)
11028 BuildMI(DispContBB, dl,
TII->get(ARM::tLEApcrelJT), NewVReg3)
11033 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg4)
11039 MachineMemOperand *JTMMOLd =
11044 BuildMI(DispContBB, dl,
TII->get(ARM::tLDRi), NewVReg5)
11050 unsigned NewVReg6 = NewVReg5;
11051 if (IsPositionIndependent) {
11053 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg6)
11060 BuildMI(DispContBB, dl,
TII->get(ARM::tBR_JTr))
11065 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRi12), NewVReg1)
11071 if (NumLPads < 256) {
11072 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPri))
11076 }
else if (Subtarget->hasV6T2Ops() &&
isUInt<16>(NumLPads)) {
11078 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVi16), VReg1)
11079 .
addImm(NumLPads & 0xFFFF)
11082 unsigned VReg2 = VReg1;
11083 if ((NumLPads & 0xFFFF0000) != 0) {
11085 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVTi16), VReg2)
11091 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
11096 MachineConstantPool *
ConstantPool = MF->getConstantPool();
11101 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
11102 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11105 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRcp))
11110 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
11122 BuildMI(DispContBB, dl,
TII->get(ARM::MOVsi), NewVReg3)
11128 BuildMI(DispContBB, dl,
TII->get(ARM::LEApcrelJT), NewVReg4)
11132 MachineMemOperand *JTMMOLd =
11136 BuildMI(DispContBB, dl,
TII->get(ARM::LDRrs), NewVReg5)
11143 if (IsPositionIndependent) {
11144 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTadd))
11149 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTr))
11156 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11157 for (MachineBasicBlock *CurMBB : LPadList) {
11158 if (SeenMBBs.
insert(CurMBB).second)
11165 for (MachineBasicBlock *BB : InvokeBBs) {
11169 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11170 while (!Successors.empty()) {
11171 MachineBasicBlock *SMBB = Successors.pop_back_val();
11173 BB->removeSuccessor(SMBB);
11179 BB->normalizeSuccProbs();
11186 II = BB->rbegin(), IE = BB->rend();
II != IE; ++
II) {
11187 if (!
II->isCall())
continue;
11189 DenseSet<unsigned> DefRegs;
11191 OI =
II->operands_begin(), OE =
II->operands_end();
11193 if (!OI->isReg())
continue;
11194 DefRegs.
insert(OI->getReg());
11197 MachineInstrBuilder MIB(*MF, &*
II);
11199 for (
unsigned i = 0; SavedRegs[i] != 0; ++i) {
11200 unsigned Reg = SavedRegs[i];
11201 if (Subtarget->isThumb2() &&
11202 !ARM::tGPRRegClass.contains(
Reg) &&
11203 !ARM::hGPRRegClass.contains(
Reg))
11205 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(
Reg))
11207 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(
Reg))
11219 for (MachineBasicBlock *MBBLPad : MBBLPads)
11220 MBBLPad->setIsEHPad(
false);
11223 MI.eraseFromParent();
11236static unsigned getLdOpcode(
unsigned LdSize,
bool IsThumb1,
bool IsThumb2) {
11238 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11239 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11241 return LdSize == 4 ? ARM::tLDRi
11242 : LdSize == 2 ? ARM::tLDRHi
11243 : LdSize == 1 ? ARM::tLDRBi : 0;
11245 return LdSize == 4 ? ARM::t2LDR_POST
11246 : LdSize == 2 ? ARM::t2LDRH_POST
11247 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11248 return LdSize == 4 ? ARM::LDR_POST_IMM
11249 : LdSize == 2 ? ARM::LDRH_POST
11250 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11255static unsigned getStOpcode(
unsigned StSize,
bool IsThumb1,
bool IsThumb2) {
11257 return StSize == 16 ? ARM::VST1q32wb_fixed
11258 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11260 return StSize == 4 ? ARM::tSTRi
11261 : StSize == 2 ? ARM::tSTRHi
11262 : StSize == 1 ? ARM::tSTRBi : 0;
11264 return StSize == 4 ? ARM::t2STR_POST
11265 : StSize == 2 ? ARM::t2STRH_POST
11266 : StSize == 1 ? ARM::t2STRB_POST : 0;
11267 return StSize == 4 ? ARM::STR_POST_IMM
11268 : StSize == 2 ? ARM::STRH_POST
11269 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11276 unsigned LdSize,
unsigned Data,
unsigned AddrIn,
11277 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11278 unsigned LdOpc =
getLdOpcode(LdSize, IsThumb1, IsThumb2);
11279 assert(LdOpc != 0 &&
"Should have a load opcode");
11286 }
else if (IsThumb1) {
11292 BuildMI(*BB, Pos, dl,
TII->get(ARM::tADDi8), AddrOut)
11297 }
else if (IsThumb2) {
11317 unsigned StSize,
unsigned Data,
unsigned AddrIn,
11318 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11319 unsigned StOpc =
getStOpcode(StSize, IsThumb1, IsThumb2);
11320 assert(StOpc != 0 &&
"Should have a store opcode");
11322 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11327 }
else if (IsThumb1) {
11334 BuildMI(*BB, Pos, dl,
TII->get(ARM::tADDi8), AddrOut)
11339 }
else if (IsThumb2) {
11340 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11346 BuildMI(*BB, Pos, dl,
TII->get(StOpc), AddrOut)
11361 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
11367 unsigned SizeVal =
MI.getOperand(2).getImm();
11368 unsigned Alignment =
MI.getOperand(3).getImm();
11372 MachineRegisterInfo &MRI = MF->
getRegInfo();
11373 unsigned UnitSize = 0;
11374 const TargetRegisterClass *TRC =
nullptr;
11375 const TargetRegisterClass *VecTRC =
nullptr;
11377 bool IsThumb1 = Subtarget->isThumb1Only();
11378 bool IsThumb2 = Subtarget->isThumb2();
11379 bool IsThumb = Subtarget->isThumb();
11381 if (Alignment & 1) {
11383 }
else if (Alignment & 2) {
11388 Subtarget->hasNEON()) {
11389 if ((Alignment % 16 == 0) && SizeVal >= 16)
11391 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11400 bool IsNeon = UnitSize >= 8;
11401 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11403 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11404 : UnitSize == 8 ? &ARM::DPRRegClass
11407 unsigned BytesLeft = SizeVal % UnitSize;
11408 unsigned LoopSize = SizeVal - BytesLeft;
11410 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11414 unsigned srcIn = src;
11415 unsigned destIn = dest;
11416 for (
unsigned i = 0; i < LoopSize; i+=UnitSize) {
11421 IsThumb1, IsThumb2);
11423 IsThumb1, IsThumb2);
11431 for (
unsigned i = 0; i < BytesLeft; i++) {
11436 IsThumb1, IsThumb2);
11438 IsThumb1, IsThumb2);
11442 MI.eraseFromParent();
11468 MF->
insert(It, loopMBB);
11469 MF->
insert(It, exitMBB);
11472 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
11483 if (Subtarget->useMovt()) {
11484 BuildMI(BB, dl,
TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11487 }
else if (Subtarget->genExecuteOnly()) {
11488 assert(IsThumb &&
"Non-thumb expected to have used movt");
11497 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11498 MachineMemOperand *CPMMO =
11522 MachineBasicBlock *entryBB = BB;
11537 BuildMI(BB, dl,
TII->get(ARM::PHI), destPhi)
11545 IsThumb1, IsThumb2);
11547 IsThumb1, IsThumb2);
11551 BuildMI(*BB, BB->
end(), dl,
TII->get(ARM::tSUBi8), varLoop)
11557 MachineInstrBuilder MIB =
11559 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11568 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11577 auto StartOfExit = exitMBB->
begin();
11581 unsigned srcIn = srcLoop;
11582 unsigned destIn = destLoop;
11583 for (
unsigned i = 0; i < BytesLeft; i++) {
11587 emitPostLd(BB, StartOfExit,
TII, dl, 1, scratch, srcIn, srcOut,
11588 IsThumb1, IsThumb2);
11589 emitPostSt(BB, StartOfExit,
TII, dl, 1, scratch, destIn, destOut,
11590 IsThumb1, IsThumb2);
11595 MI.eraseFromParent();
11603 const TargetInstrInfo &
TII = *Subtarget->getInstrInfo();
11606 assert(TM.getTargetTriple().isOSWindows() &&
11607 "__chkstk is only supported on Windows");
11608 assert(Subtarget->isThumb2() &&
"Windows on ARM requires Thumb-2 mode");
11628 RTLIB::LibcallImpl ChkStkLibcall =
getLibcallImpl(RTLIB::STACK_PROBE);
11629 if (ChkStkLibcall == RTLIB::Unsupported)
11633 switch (TM.getCodeModel()) {
11675 MI.eraseFromParent();
11684 const TargetInstrInfo *
TII = Subtarget->getInstrInfo();
11699 .
addReg(
MI.getOperand(0).getReg())
11707 MI.eraseFromParent();
11731 if (miI == BB->
end()) {
11733 if (Succ->isLiveIn(ARM::CPSR))
11739 SelectItr->addRegisterKilled(ARM::CPSR,
TRI);
11752 BuildMI(TpEntry, Dl,
TII->get(ARM::t2ADDri), AddDestReg)
11759 BuildMI(TpEntry, Dl,
TII->get(ARM::t2LSRri), LsrDestReg)
11766 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11769 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopStart))
11770 .
addUse(TotalIterationsReg)
11777 return TotalIterationsReg;
11788 Register TotalIterationsReg,
bool IsMemcpy) {
11797 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), SrcPhiReg)
11807 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), DestPhiReg)
11815 Register RemainingLoopIterationsReg =
11817 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), LoopCounterPhiReg)
11818 .
addUse(TotalIterationsReg)
11820 .
addUse(RemainingLoopIterationsReg)
11826 BuildMI(TpLoopBody, Dl,
TII->get(ARM::PHI), PredCounterPhiReg)
11827 .
addUse(ElementCountReg)
11829 .
addUse(RemainingElementsReg)
11834 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VCTP8), VccrReg)
11835 .
addUse(PredCounterPhiReg)
11840 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2SUBri), RemainingElementsReg)
11841 .
addUse(PredCounterPhiReg)
11850 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VLDRBU8_post))
11859 SrcValueReg = OpSrcReg;
11861 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VSTRBU8_post))
11872 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11873 .
addUse(LoopCounterPhiReg)
11876 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopEnd))
11877 .
addUse(RemainingLoopIterationsReg)
11895 "Invalid call instruction for a KCFI check");
11898 switch (
MBBI->getOpcode()) {
11901 case ARM::BLX_pred:
11902 case ARM::BLX_noip:
11903 case ARM::BLX_pred_noip:
11905 TargetOp = &
MBBI->getOperand(0);
11907 case ARM::TCRETURNri:
11908 case ARM::TCRETURNrinotr12:
11909 case ARM::TAILJMPr:
11910 case ARM::TAILJMPr4:
11911 TargetOp = &
MBBI->getOperand(0);
11917 case ARM::tBLXr_noip:
11918 case ARM::tBX_CALL:
11919 TargetOp = &
MBBI->getOperand(2);
11922 case ARM::tTAILJMPr:
11923 TargetOp = &
MBBI->getOperand(0);
11929 assert(TargetOp && TargetOp->
isReg() &&
"Invalid target operand");
11933 unsigned KCFICheckOpcode;
11934 if (Subtarget->isThumb()) {
11935 if (Subtarget->isThumb2()) {
11936 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2;
11938 KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1;
11941 KCFICheckOpcode = ARM::KCFI_CHECK_ARM;
11955 bool isThumb2 = Subtarget->isThumb2();
11956 switch (
MI.getOpcode()) {
11963 case ARM::tLDR_postidx: {
11967 .
add(
MI.getOperand(2))
11968 .
add(
MI.getOperand(3))
11969 .
add(
MI.getOperand(4))
11970 .
add(
MI.getOperand(0))
11972 MI.eraseFromParent();
11976 case ARM::MVE_MEMCPYLOOPINST:
11977 case ARM::MVE_MEMSETLOOPINST: {
12007 Register OpDestReg =
MI.getOperand(0).getReg();
12008 Register OpSrcReg =
MI.getOperand(1).getReg();
12009 Register OpSizeReg =
MI.getOperand(2).getReg();
12029 if (TpExit == BB) {
12031 "block containing memcpy/memset Pseudo");
12041 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg,
TII, dl, MRI);
12044 bool IsMemcpy =
MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12046 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12049 Properties.resetNoPHIs();
12061 MI.eraseFromParent();
12071 case ARM::t2STR_preidx:
12072 MI.setDesc(
TII->get(ARM::t2STR_PRE));
12074 case ARM::t2STRB_preidx:
12075 MI.setDesc(
TII->get(ARM::t2STRB_PRE));
12077 case ARM::t2STRH_preidx:
12078 MI.setDesc(
TII->get(ARM::t2STRH_PRE));
12081 case ARM::STRi_preidx:
12082 case ARM::STRBi_preidx: {
12083 unsigned NewOpc =
MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12084 : ARM::STRB_PRE_IMM;
12086 unsigned Offset =
MI.getOperand(4).getImm();
12094 .
add(
MI.getOperand(0))
12095 .
add(
MI.getOperand(1))
12096 .
add(
MI.getOperand(2))
12098 .
add(
MI.getOperand(5))
12099 .
add(
MI.getOperand(6))
12101 MI.eraseFromParent();
12104 case ARM::STRr_preidx:
12105 case ARM::STRBr_preidx:
12106 case ARM::STRH_preidx: {
12108 switch (
MI.getOpcode()) {
12110 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG;
break;
12111 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG;
break;
12112 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE;
break;
12117 MI.eraseFromParent();
12121 case ARM::tMOVCCr_pseudo: {
12139 F->insert(It, copy0MBB);
12140 F->insert(It, sinkMBB);
12143 unsigned CallFrameSize =
TII->getCallFrameSizeAt(
MI);
12149 if (!
MI.killsRegister(ARM::CPSR,
nullptr) &&
12165 .
addImm(
MI.getOperand(3).getImm())
12166 .
addReg(
MI.getOperand(4).getReg());
12181 .
addReg(
MI.getOperand(1).getReg())
12183 .
addReg(
MI.getOperand(2).getReg())
12186 MI.eraseFromParent();
12191 case ARM::BCCZi64: {
12197 bool RHSisZero =
MI.getOpcode() == ARM::BCCZi64;
12202 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12206 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12212 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12216 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12226 BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12235 MI.eraseFromParent();
12239 case ARM::Int_eh_sjlj_setjmp:
12240 case ARM::Int_eh_sjlj_setjmp_nofp:
12241 case ARM::tInt_eh_sjlj_setjmp:
12242 case ARM::t2Int_eh_sjlj_setjmp:
12243 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12246 case ARM::Int_eh_sjlj_setup_dispatch:
12247 EmitSjLjDispatchBlock(
MI, BB);
12249 case ARM::COPY_STRUCT_BYVAL_I32:
12251 return EmitStructByval(
MI, BB);
12252 case ARM::WIN__CHKSTK:
12253 return EmitLowered__chkstk(
MI, BB);
12254 case ARM::WIN__DBZCHK:
12255 return EmitLowered__dbzchk(
MI, BB);
12271 if (!
Node->hasAnyUseOfValue(0)) {
12272 MI.getOperand(0).setIsDead(
true);
12274 if (!
Node->hasAnyUseOfValue(1)) {
12275 MI.getOperand(1).setIsDead(
true);
12279 for (
unsigned I = 0;
I !=
MI.getOperand(4).
getImm(); ++
I) {
12281 : &ARM::GPRRegClass);
12288 if (
MI.getOpcode() == ARM::MEMCPY) {
12309 MI.getDesc().getNumOperands() + 5 -
MI.getDesc().getSize()
12310 &&
"converted opcode should be the same except for cc_out"
12311 " (and, on Thumb1, pred)");
12319 if (Subtarget->isThumb1Only()) {
12320 for (
unsigned c =
MCID->getNumOperands() - 4; c--;) {
12321 MI.addOperand(
MI.getOperand(1));
12322 MI.removeOperand(1);
12326 for (
unsigned i =
MI.getNumOperands(); i--;) {
12328 if (
op.isReg() &&
op.isUse()) {
12331 MI.tieOperands(DefIdx, i);
12339 ccOutIdx =
MCID->getNumOperands() - 1;
12341 ccOutIdx =
MCID->getNumOperands() - 1;
12345 if (!
MI.hasOptionalDef() || !
MCID->operands()[ccOutIdx].isOptionalDef()) {
12346 assert(!NewOpc &&
"Optional cc_out operand required");
12351 bool definesCPSR =
false;
12352 bool deadCPSR =
false;
12353 for (
unsigned i =
MCID->getNumOperands(), e =
MI.getNumOperands(); i != e;
12357 definesCPSR =
true;
12360 MI.removeOperand(i);
12364 if (!definesCPSR) {
12365 assert(!NewOpc &&
"Optional cc_out operand required");
12368 assert(deadCPSR == !
Node->hasAnyUseOfValue(1) &&
"inconsistent dead flag");
12370 assert(!
MI.getOperand(ccOutIdx).getReg() &&
12371 "expect uninitialized optional cc_out operand");
12373 if (!Subtarget->isThumb1Only())
12409 switch (
N->getOpcode()) {
12410 default:
return false;
12412 CC =
N->getOperand(0);
12434 EVT VT =
N->getValueType(0);
12435 CC =
N->getOperand(0);
12482 EVT VT =
N->getValueType(0);
12485 bool SwapSelectOps;
12487 NonConstantVal, DAG))
12493 OtherOp, NonConstantVal);
12499 CCOp, TrueVal, FalseVal);
12519 if (
N->getOpcode() == ARMISD::VUZP)
12523 if (
N->getOpcode() == ARMISD::VTRN &&
N->getValueType(0) == MVT::v2i32)
12538 if (!
N->getValueType(0).is64BitVector())
12546 EVT VT =
N->getValueType(0);
12585 EVT VT =
N->getValueType(0);
12591 Opcode = Intrinsic::arm_neon_vpaddls;
12593 Opcode = Intrinsic::arm_neon_vpaddlu;
12621 EVT VT =
N->getValueType(0);
12636 unsigned nextIndex = 0;
12687 Ops.push_back(Vec);
12704 return DAG.
getNode(ExtOp, dl, VT, tmp);
12735 if (SRA.getOpcode() !=
ISD::SRA) {
12742 if (Const->getZExtValue() != 31)
12747 if (SRA.getOperand(0) !=
Mul)
12751 SDLoc dl(AddcNode);
12752 unsigned Opcode = 0;
12757 Opcode = ARMISD::SMLALBB;
12758 Op0 =
Mul.getOperand(0);
12759 Op1 =
Mul.getOperand(1);
12761 Opcode = ARMISD::SMLALBT;
12762 Op0 =
Mul.getOperand(0);
12763 Op1 =
Mul.getOperand(1).getOperand(0);
12765 Opcode = ARMISD::SMLALTB;
12766 Op0 =
Mul.getOperand(0).getOperand(0);
12767 Op1 =
Mul.getOperand(1);
12769 Opcode = ARMISD::SMLALTT;
12770 Op0 =
Mul->getOperand(0).getOperand(0);
12771 Op1 =
Mul->getOperand(1).getOperand(0);
12787 SDValue resNode(AddcNode, 0);
12815 AddeSubeNode->
getOpcode() == ARMISD::SUBE) &&
12816 "Expect an ADDE or SUBE");
12820 "ADDE node has the wrong inputs");
12824 if ((AddeSubeNode->
getOpcode() == ARMISD::ADDE &&
12825 AddcSubcNode->
getOpcode() != ARMISD::ADDC) ||
12826 (AddeSubeNode->
getOpcode() == ARMISD::SUBE &&
12827 AddcSubcNode->
getOpcode() != ARMISD::SUBC))
12839 "Expect ADDC with two result values. First: i32");
12843 if (AddeSubeNode->
getOpcode() == ARMISD::ADDE &&
12859 bool IsLeftOperandMUL =
false;
12864 IsLeftOperandMUL =
true;
12875 SDValue *LowAddSub =
nullptr;
12878 if ((AddeSubeOp0 != MULOp.
getValue(1)) && (AddeSubeOp1 != MULOp.
getValue(1)))
12881 if (IsLeftOperandMUL)
12882 HiAddSub = &AddeSubeOp1;
12884 HiAddSub = &AddeSubeOp0;
12889 if (AddcSubcOp0 == MULOp.
getValue(0)) {
12890 LoMul = &AddcSubcOp0;
12891 LowAddSub = &AddcSubcOp1;
12893 if (AddcSubcOp1 == MULOp.
getValue(0)) {
12894 LoMul = &AddcSubcOp1;
12895 LowAddSub = &AddcSubcOp0;
12903 if (AddcSubcNode == HiAddSub->getNode() ||
12919 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->
useMulOps() &&
12924 Ops.push_back(*HiAddSub);
12925 if (AddcSubcNode->
getOpcode() == ARMISD::SUBC) {
12926 FinalOpc = ARMISD::SMMLSR;
12928 FinalOpc = ARMISD::SMMLAR;
12933 return SDValue(AddeSubeNode, 0);
12934 }
else if (AddcSubcNode->
getOpcode() == ARMISD::SUBC)
12940 Ops.push_back(*LowAddSub);
12941 Ops.push_back(*HiAddSub);
12954 return SDValue(AddeSubeNode, 0);
12966 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12971 if (AddcNode->
getOpcode() != ARMISD::ADDC)
12975 SDNode *UmlalNode =
nullptr;
13014 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13019 SDNode* AddcNode =
N->getOperand(2).getNode();
13020 SDNode* AddeNode =
N->getOperand(3).getNode();
13021 if ((AddcNode->
getOpcode() == ARMISD::ADDC) &&
13022 (AddeNode->
getOpcode() == ARMISD::ADDE) &&
13028 {N->getOperand(0), N->getOperand(1),
13029 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13039 if (
N->getOpcode() == ARMISD::SUBC &&
N->hasAnyUseOfValue(1)) {
13043 if (
LHS->getOpcode() == ARMISD::ADDE &&
13053 int32_t imm =
C->getSExtValue();
13054 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13057 unsigned Opcode = (
N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13059 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
N->getOperand(0),
RHS);
13074 int64_t imm =
C->getSExtValue();
13083 unsigned Opcode = (
N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13085 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
13086 N->getOperand(0),
RHS,
N->getOperand(2));
13098 if (!Subtarget->hasMVEIntegerOps())
13111 SetCC =
N->getOperand(0);
13115 TrueVal =
N->getOperand(1);
13116 FalseVal =
N->getOperand(2);
13118 LHS =
N->getOperand(0);
13119 RHS =
N->getOperand(1);
13121 TrueVal =
N->getOperand(2);
13122 FalseVal =
N->getOperand(3);
13127 unsigned int Opcode = 0;
13131 Opcode = ARMISD::VMINVu;
13137 Opcode = ARMISD::VMINVs;
13143 Opcode = ARMISD::VMAXVu;
13149 Opcode = ARMISD::VMAXVs;
13156 switch (TrueVal->getOpcode()) {
13175 if (TrueVal !=
LHS || FalseVal !=
RHS)
13178 EVT LeftType =
LHS->getValueType(0);
13179 EVT RightType =
RHS->getValueType(0);
13182 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13186 if (VectorScalarType != MVT::i32)
13194 if (VectorScalarType != MVT::i32)
13207 EVT VT =
N->getValueType(0);
13215 Shft =
N->getOperand(0);
13222 Cmp.getOperand(0) !=
N->getOperand(1) ||
13223 Cmp.getOperand(1) !=
N->getOperand(2))
13225 Shft =
N->getOperand(1);
13237 ScalarType = MVT::i8;
13240 case (1 << 15) - 1:
13241 ScalarType = MVT::i16;
13244 case (1ULL << 31) - 1:
13245 ScalarType = MVT::i32;
13276 unsigned LegalLanes = 128 / (ShftAmt + 1);
13288 Inp0 = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, LegalVecVT, Inp0);
13289 Inp1 = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, LegalVecVT, Inp1);
13290 SDValue VQDMULH = DAG.
getNode(ARMISD::VQDMULH,
DL, LegalVecVT, Inp0, Inp1);
13291 SDValue Trunc = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, ExtVecVT, VQDMULH);
13300 for (
unsigned I = 0;
I < NumParts; ++
I) {
13307 SDValue VQDMULH = DAG.
getNode(ARMISD::VQDMULH,
DL, LegalVecVT, Inp0, Inp1);
13317 if (!Subtarget->hasMVEIntegerOps())
13322 if (
N->getOperand(0).getOpcode() == ARMISD::PREDICATE_CAST &&
13324 unsigned C =
N->getOperand(0).getConstantOperandVal(0);
13326 return N->getOperand(2);
13328 return N->getOperand(1);
13343 if (
N->getOperand(0).getOpcode() !=
ISD::XOR)
13353 if (!Const || !Const->isOne())
13371 EVT VT =
N->getValueType(0);
13373 if (!Subtarget->hasMVEIntegerOps() ||
13402 Opc = Intrinsic::arm_mve_vctp64;
13405 Opc = Intrinsic::arm_mve_vctp32;
13408 Opc = Intrinsic::arm_mve_vctp16;
13411 Opc = Intrinsic::arm_mve_vctp8;
13465 EVT VT =
N->getValueType(0);
13471 switch (
Op.getOpcode()) {
13473 case ARMISD::VADDVs:
13474 case ARMISD::VADDVu:
13475 case ARMISD::VMLAVs:
13476 case ARMISD::VMLAVu:
13496 unsigned N0RedOp = 0;
13503 unsigned N1RedOp = 0;
13517 if (
SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13519 if (
SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13526 auto DistrubuteVecReduceLoad = [&](
SDValue N0,
SDValue N1,
bool IsForward) {
13550 if (!BaseLocDecomp0.getBase() ||
13551 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13552 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13554 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13556 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13566 if (IsBefore < 0) {
13569 }
else if (IsBefore > 0) {
13582 }
else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13592 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13602 if (
SDValue R = DistrubuteVecReduceLoad(N0, N1,
true))
13604 if (
SDValue R = DistrubuteVecReduceLoad(N1, N0,
false))
13611 if (!Subtarget->hasMVEIntegerOps())
13617 EVT VT =
N->getValueType(0);
13622 if (VT != MVT::i64)
13633 auto MakeVecReduce = [&](
unsigned Opcode,
unsigned OpcodeA,
SDValue NA,
13653 unsigned S = VecRed->
getOpcode() == OpcodeA ? 2 : 0;
13662 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13664 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13666 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13668 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13670 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13672 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13674 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13676 if (
SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13678 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13680 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13682 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13684 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13686 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13688 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13690 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13692 if (
SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13702 "Expected shift op");
13704 SDValue ShiftLHS =
N->getOperand(0);
13718 if (Subtarget->isThumb1Only()) {
13729 if (Const->getAPIntValue().ult(256))
13732 Const->getAPIntValue().sgt(-256))
13748 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
13749 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
13750 "Expected XOR(SHIFT) pattern");
13755 if (XorC && ShiftC) {
13756 unsigned MaskIdx, MaskLen;
13757 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13758 unsigned ShiftAmt = ShiftC->getZExtValue();
13759 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
13760 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
13761 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
13762 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
13772 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
13774 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
13775 "Expected shift-shift mask");
13777 if (!Subtarget->isThumb1Only())
13780 EVT VT =
N->getValueType(0);
13788 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
13790 return Subtarget->hasMVEIntegerOps() &&
isTypeLegal(VT) &&
13795 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps()) {
13796 if (Subtarget->isThumb1Only())
13810 return Subtarget->hasVFP2Base();
13812 return Subtarget->hasVFP2Base();
13814 return Subtarget->hasFP64();
13817 return Subtarget->hasMVEFloatOps();
13846 if (ST->isThumb() && ST->isThumb1Only())
13850 for (
auto *U :
N->users()) {
13851 switch(U->getOpcode()) {
13869 if (U->getOperand(0).getOpcode() ==
ISD::SHL ||
13870 U->getOperand(1).getOpcode() ==
ISD::SHL)
13880 if (
N->getOperand(0).getOpcode() !=
ISD::SHL)
13887 if (!C1ShlC2 || !C2)
13890 APInt C2Int = C2->getAPIntValue();
13891 APInt C1Int = C1ShlC2->getAPIntValue();
13893 if (C2Int.
uge(C2Width))
13899 if ((C1Int & Mask) != C1Int)
13906 auto LargeImm = [](
const APInt &Imm) {
13907 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13908 return Imm.getBitWidth() - Zeros > 8;
13911 if (LargeImm(C1Int) || LargeImm(C2Int))
13923 SHL.dump();
N->dump());
13984 if (
Op.hasOneUse() && ShiftAmt &&
13985 ShiftAmt->
getZExtValue() ==
Op.getValueType().getScalarSizeInBits() - 1)
14043 if (!Subtarget->hasMVEIntegerOps() || !
N->getValueType(0).isVector())
14064 return DCI.
DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0), Negate);
14085 if (!Subtarget->hasVMLxForwarding())
14104 EVT VT =
N->getValueType(0);
14115 EVT VT =
N->getValueType(0);
14116 if (VT != MVT::v2i64)
14127 return Op->getOperand(0);
14141 And =
And->getOperand(0);
14146 Mask = Mask->getOperand(0);
14149 Mask.getValueType() != MVT::v4i32)
14155 return And->getOperand(0);
14160 if (
SDValue Op0 = IsSignExt(N0)) {
14161 if (
SDValue Op1 = IsSignExt(N1)) {
14162 SDValue New0a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14163 SDValue New1a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14164 return DAG.
getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14167 if (
SDValue Op0 = IsZeroExt(N0)) {
14168 if (
SDValue Op1 = IsZeroExt(N1)) {
14169 SDValue New0a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14170 SDValue New1a = DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14171 return DAG.
getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14183 EVT VT =
N->getValueType(0);
14184 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14195 if (VT != MVT::i32)
14202 int64_t MulAmt =
C->getSExtValue();
14205 ShiftAmt = ShiftAmt & (32 - 1);
14210 MulAmt >>= ShiftAmt;
14271 if (
N->getValueType(0) != MVT::i32)
14280 if (C1 == 255 || C1 == 65535)
14283 SDNode *N0 =
N->getOperand(0).getNode();
14297 if (!C2 || C2 >= 32)
14341 if (Trailing == C2 && C2 + C3 < 32) {
14354 if (Leading == C2 && C2 + C3 < 32) {
14382 EVT VT =
N->getValueType(0);
14386 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14389 APInt SplatBits, SplatUndef;
14390 unsigned SplatBitSize;
14392 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14393 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14394 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14395 SplatBitSize == 64) {
14402 DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT,
N->getOperand(0));
14404 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14429 if (!Subtarget->hasV6Ops() ||
14430 (Subtarget->isThumb() &&
14431 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14434 SDValue SRL = OR->getOperand(0);
14435 SDValue SHL = OR->getOperand(1);
14438 SRL = OR->getOperand(1);
14439 SHL = OR->getOperand(0);
14446 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14450 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14451 if (SRL.getOperand(0) !=
SDValue(SMULLOHI, 0) ||
14452 SHL.getOperand(0) !=
SDValue(SMULLOHI, 1))
14471 unsigned Opcode = 0;
14472 if (
isS16(OpS16, DAG))
14473 Opcode = ARMISD::SMULWB;
14475 Opcode = ARMISD::SMULWT;
14490 if (Subtarget->
isThumb1Only() || !Subtarget->hasV6T2Ops())
14493 EVT VT =
N->getValueType(0);
14508 if (VT != MVT::i32)
14521 if (Mask == 0xffff)
14528 if ((Val & ~Mask) != Val)
14534 Res = DAG.
getNode(ARMISD::BFI,
DL, VT, N00,
14553 (Mask == ~Mask2)) {
14556 if (Subtarget->hasDSP() &&
14557 (Mask == 0xffff || Mask == 0xffff0000))
14563 Res = DAG.
getNode(ARMISD::BFI,
DL, VT, N00, Res,
14570 (~Mask == Mask2)) {
14573 if (Subtarget->hasDSP() &&
14574 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14630 if (
N->getOpcode() == ARMISD::VCMP)
14632 else if (
N->getOpcode() == ARMISD::VCMPZ)
14640 return isValidMVECond(CC,
N->getOperand(0).getValueType().isFloatingPoint());
14647 EVT VT =
N->getValueType(0);
14652 auto IsFreelyInvertable = [&](
SDValue V) {
14653 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14659 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14677 if (AndOp.getOpcode() !=
ISD::AND)
14681 SDValue Mask = AndOp.getOperand(1);
14691 bool IsShiftRight =
false;
14694 if (ShiftOp.
getOpcode() == ARMISD::VSHRuIMM) {
14695 IsShiftRight =
true;
14698 }
else if (ShiftOp.
getOpcode() == ARMISD::VSHLIMM) {
14706 APInt RequiredMask = IsShiftRight
14709 if (MaskBits != RequiredMask)
14712 unsigned Opc = IsShiftRight ? ARMISD::VSRIIMM : ARMISD::VSLIIMM;
14722 EVT VT =
N->getValueType(0);
14728 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14729 VT == MVT::v8i1 || VT == MVT::v16i1))
14732 APInt SplatBits, SplatUndef;
14733 unsigned SplatBitSize;
14735 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14736 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14737 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14738 SplatBitSize == 64) {
14745 DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT,
N->getOperand(0));
14747 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14767 (Subtarget->hasMVEIntegerOps() &&
14768 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32)))) {
14771 return ShiftInsert;
14775 return ShiftInsert;
14789 unsigned SplatBitSize;
14792 APInt SplatBits0, SplatBits1;
14796 if (BVN0 && BVN0->
isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14797 HasAnyUndefs) && !HasAnyUndefs) {
14798 if (BVN1 && BVN1->
isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14799 HasAnyUndefs) && !HasAnyUndefs) {
14804 SplatBits0 == ~SplatBits1) {
14812 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14832 if (CSINC.
getOpcode() != ARMISD::CSINC)
14834 if (CSINC.
getOpcode() == ARMISD::CSINC &&
14847 EVT VT =
N->getValueType(0);
14862 if (Subtarget->hasMVEIntegerOps()) {
14890 assert(
N->getOpcode() == ARMISD::BFI);
14893 ToMask =
~N->getConstantOperandAPInt(2);
14913 unsigned LastActiveBitInA =
A.countr_zero();
14914 unsigned FirstActiveBitInB =
B.getBitWidth() -
B.countl_zero() - 1;
14915 return LastActiveBitInA - 1 == FirstActiveBitInB;
14920 APInt ToMask, FromMask;
14925 if (V.getOpcode() != ARMISD::BFI)
14928 APInt NewToMask, NewFromMask;
14930 if (NewFrom != From)
14934 if ((NewToMask & ToMask).getBoolValue())
14959 unsigned InvMask =
N->getConstantOperandVal(2);
14963 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14964 "undefined behavior");
14965 unsigned Mask = (1u << Width) - 1;
14967 if ((Mask & (~Mask2)) == 0)
14969 N->getOperand(0), N1.
getOperand(0),
N->getOperand(2));
14976 APInt ToMask1, FromMask1;
14979 APInt ToMask2, FromMask2;
14985 APInt NewFromMask = FromMask1 | FromMask2;
14986 APInt NewToMask = ToMask1 | ToMask2;
14988 EVT VT =
N->getValueType(0);
14991 if (NewFromMask[0] == 0)
14994 return DAG.
getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
15002 if (
N->getOperand(0).getOpcode() == ARMISD::BFI) {
15003 APInt ToMask1 =
~N->getConstantOperandAPInt(2);
15004 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
15006 if (!N0.
hasOneUse() || (ToMask1 & ToMask2) != 0 ||
15010 EVT VT =
N->getValueType(0);
15013 N->getOperand(1),
N->getOperand(2));
15025 if (Cmp->getOpcode() != ARMISD::CMPZ || !
isNullConstant(Cmp->getOperand(1)))
15027 SDValue CSInc = Cmp->getOperand(0);
15037 if (CSInc.
getOpcode() == ARMISD::CSINC &&
15077 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ)
15078 return DAG.
getNode(
N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
15081 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
15083 N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
15096 SDValue InDouble =
N->getOperand(0);
15097 if (InDouble.
getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15111 SDValue BasePtr = LD->getBasePtr();
15113 DAG.
getLoad(MVT::i32,
DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15114 LD->getAlign(), LD->getMemOperand()->getFlags());
15120 LD->getPointerInfo().getWithOffset(4),
15122 LD->getMemOperand()->getFlags());
15141 BV.
getOpcode() == ARMISD::VECTOR_REG_CAST) &&
15155 if (!Subtarget->
isLittle() && BVSwap)
15173 if (!Subtarget->
isLittle() && BVSwap)
15192 if (Op0.
getOpcode() == ARMISD::VMOVRRD &&
15205 if (Op0->
getOpcode() == ARMISD::VMOVrh)
15218 if (Copy.getValueType() == MVT::f32 &&
15220 bool HasGlue = Copy->getNumOperands() == 3;
15221 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15222 HasGlue ? Copy->getOperand(2) :
SDValue()};
15223 EVT OutTys[] = {
N->getValueType(0), MVT::Other, MVT::Glue};
15242 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15243 LN0->getMemoryVT() == MVT::i16) {
15246 LN0->getBasePtr(), LN0->getMemOperand());
15264 EVT VT =
N->getValueType(0);
15298 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
15299 for (
unsigned i = 0; i < NumElts; ++i) {
15300 SDNode *Elt =
N->getOperand(i).getNode();
15317 if (
N->getNumOperands() == 2)
15323 EVT VT =
N->getValueType(0);
15329 for (
unsigned i = 0; i < NumElts; ++i) {
15355 EVT VT =
N->getValueType(0);
15363 assert(EltVT == MVT::f32 &&
"Unexpected type!");
15368 Use->getValueType(0).isFloatingPoint())
15376 unsigned NumOfBitCastedElts = 0;
15378 unsigned NumOfRelevantElts = NumElts;
15379 for (
unsigned Idx = 0; Idx < NumElts; ++Idx) {
15384 ++NumOfBitCastedElts;
15388 --NumOfRelevantElts;
15392 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15410 for (
unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15415 V->getOperand(0).getValueType() == MVT::i32)
15417 V = V.getOperand(0);
15434 EVT VT =
N->getValueType(0);
15439 if (
Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15441 if (
Op->getOperand(0).getValueType() == VT)
15442 return Op->getOperand(0);
15443 return DCI.
DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT,
Op->getOperand(0));
15450 DCI.
DAG.
getNode(ARMISD::PREDICATE_CAST, dl, VT,
Op->getOperand(0));
15457 if (
Op.getValueType() == MVT::i32) {
15468 EVT VT =
N->getValueType(0);
15473 if (ST->isLittle())
15477 if (
Op.getValueType() == VT)
15484 if (
Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15486 if (
Op->getOperand(0).getValueType() == VT)
15487 return Op->getOperand(0);
15488 return DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, VT,
Op->getOperand(0));
15496 if (!Subtarget->hasMVEIntegerOps())
15499 EVT VT =
N->getValueType(0);
15507 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Op0,
N->getOperand(2));
15513 return DAG.
getNode(ARMISD::VCMPZ, dl, VT, Op1,
15517 return DAG.
getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15530 EVT VT =
N->getValueType(0);
15531 SDNode *Elt =
N->getOperand(1).getNode();
15546 Vec, V,
N->getOperand(2));
15556 EVT VT =
N->getValueType(0);
15584 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15585 isa<ConstantSDNode>(V->getOperand(1)) &&
15586 V->getConstantOperandVal(1) == Lane + 1 &&
15587 V->getOperand(0).getResNo() == ResNo;
15589 if (OtherIt == Op0->
users().
end())
15594 SDValue OtherExt(*OtherIt, 0);
15606 DCI.
DAG.
getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15609 DCI.
DAG.
getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32},
F64);
15619 EVT VT =
N->getValueType(0);
15623 if (Op0->
getOpcode() == ARMISD::VDUP) {
15625 if (VT == MVT::f16 &&
X.getValueType() == MVT::i32)
15626 return DCI.
DAG.
getNode(ARMISD::VMOVhr, dl, VT,
X);
15627 if (VT == MVT::i32 &&
X.getValueType() == MVT::f16)
15628 return DCI.
DAG.
getNode(ARMISD::VMOVrh, dl, VT,
X);
15629 if (VT == MVT::f32 &&
X.getValueType() == MVT::i32)
15632 while (
X.getValueType() != VT &&
X->getOpcode() ==
ISD::BITCAST)
15633 X =
X->getOperand(0);
15634 if (
X.getValueType() == VT)
15642 return Op0.
getOperand(
N->getConstantOperandVal(1));
15652 unsigned Offset =
N->getConstantOperandVal(1);
15654 if (MOV.
getOpcode() == ARMISD::VMOVDRR)
15664 unsigned Idx =
N->getConstantOperandVal(1);
15679 unsigned Lane =
N->getConstantOperandVal(1);
15707 EVT VT =
N->getValueType(0);
15710 if (
Op.getOpcode() == ARMISD::VGETLANEu &&
15712 Op.getOperand(0).getValueType().getScalarType())
15713 return DAG.
getNode(ARMISD::VGETLANEs,
SDLoc(
N), VT,
Op.getOperand(0),
15722 SDValue SubVec =
N->getOperand(1);
15723 uint64_t IdxVal =
N->getConstantOperandVal(2);
15734 if (IdxVal == 0 && Vec.
isUndef())
15740 (IdxVal != 0 && IdxVal != NumSubElts))
15771 ARMISD::VMOVN,
DL, VT,
15777 ARMISD::VMOVN,
DL, VT,
15813 EVT VT =
N->getValueType(0);
15824 unsigned HalfElts = NumElts/2;
15826 for (
unsigned n = 0; n < NumElts; ++n) {
15829 if (MaskElt < (
int)HalfElts)
15831 else if (MaskElt >= (
int)NumElts && MaskElt < (
int)(NumElts + HalfElts))
15832 NewElt = HalfElts + MaskElt - NumElts;
15875 bool SimpleConstIncOnly,
15883 bool isLoadOp =
true;
15884 bool isLaneOp =
false;
15887 bool hasAlignment =
true;
15888 unsigned NewOpc = 0;
15889 unsigned NumVecs = 0;
15890 if (
Target.isIntrinsic) {
15891 unsigned IntNo =
N->getConstantOperandVal(1);
15895 case Intrinsic::arm_neon_vld1:
15899 case Intrinsic::arm_neon_vld2:
15903 case Intrinsic::arm_neon_vld3:
15907 case Intrinsic::arm_neon_vld4:
15911 case Intrinsic::arm_neon_vld1x2:
15914 hasAlignment =
false;
15916 case Intrinsic::arm_neon_vld1x3:
15919 hasAlignment =
false;
15921 case Intrinsic::arm_neon_vld1x4:
15924 hasAlignment =
false;
15926 case Intrinsic::arm_neon_vld2dup:
15930 case Intrinsic::arm_neon_vld3dup:
15934 case Intrinsic::arm_neon_vld4dup:
15938 case Intrinsic::arm_neon_vld2lane:
15943 case Intrinsic::arm_neon_vld3lane:
15948 case Intrinsic::arm_neon_vld4lane:
15953 case Intrinsic::arm_neon_vst1:
15958 case Intrinsic::arm_neon_vst2:
15959 NewOpc = ARMISD::VST2_UPD;
15963 case Intrinsic::arm_neon_vst3:
15968 case Intrinsic::arm_neon_vst4:
15969 NewOpc = ARMISD::VST4_UPD;
15973 case Intrinsic::arm_neon_vst2lane:
15979 case Intrinsic::arm_neon_vst3lane:
15985 case Intrinsic::arm_neon_vst4lane:
15991 case Intrinsic::arm_neon_vst1x2:
15995 hasAlignment =
false;
15997 case Intrinsic::arm_neon_vst1x3:
16001 hasAlignment =
false;
16003 case Intrinsic::arm_neon_vst1x4:
16007 hasAlignment =
false;
16012 switch (
N->getOpcode()) {
16048 VecTy =
N->getValueType(0);
16049 }
else if (
Target.isIntrinsic) {
16050 VecTy =
N->getOperand(
Target.AddrOpIdx + 1).getValueType();
16053 "Node has to be a load, a store, or an intrinsic!");
16054 VecTy =
N->getOperand(1).getValueType();
16062 if (isLaneOp || isVLDDUPOp)
16065 if (NumBytes >= 3 * 16 &&
User.ConstInc != NumBytes) {
16071 if (SimpleConstIncOnly &&
User.ConstInc != NumBytes)
16080 EVT AlignedVecTy = VecTy;
16100 assert(NumVecs == 1 &&
"Unexpected multi-element generic load/store.");
16101 assert(!isLaneOp &&
"Unexpected generic load/store lane.");
16112 Alignment =
Align(1);
16118 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16120 for (n = 0; n < NumResultVecs; ++n)
16121 Tys[n] = AlignedVecTy;
16122 Tys[n++] = MVT::i32;
16123 Tys[n] = MVT::Other;
16128 Ops.push_back(
N->getOperand(0));
16129 Ops.push_back(
N->getOperand(
Target.AddrOpIdx));
16134 Ops.push_back(StN->getValue());
16138 unsigned LastOperand =
16139 hasAlignment ?
N->getNumOperands() - 1 :
N->getNumOperands();
16140 for (
unsigned i =
Target.AddrOpIdx + 1; i < LastOperand; ++i)
16141 Ops.push_back(
N->getOperand(i));
16149 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::STORE) {
16160 for (
unsigned i = 0; i < NumResultVecs; ++i)
16165 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::LOAD) {
16166 SDValue &LdVal = NewResults[0];
16202 switch (
N->getOpcode()) {
16206 *Ptr =
N->getOperand(0);
16207 *CInc =
N->getOperand(1);
16214 *Ptr =
N->getOperand(1);
16215 *CInc =
N->getOperand(2);
16242 SDValue Addr =
N->getOperand(AddrOpIdx);
16253 unsigned ConstInc =
16258 if (BaseUpdates.
size() >= MaxBaseUpdates)
16279 unsigned UserOffset =
16282 if (!UserOffset || UserOffset <=
Offset)
16285 unsigned NewConstInc = UserOffset -
Offset;
16288 if (BaseUpdates.
size() >= MaxBaseUpdates)
16296 unsigned NumValidUpd = BaseUpdates.
size();
16297 for (
unsigned I = 0;
I < NumValidUpd;
I++) {
16308 return LHS.ConstInc <
RHS.ConstInc;
16337 unsigned IntNo =
N->getConstantOperandVal(1);
16338 if (IntNo == Intrinsic::arm_mve_vst2q &&
N->getConstantOperandVal(5) != 1)
16340 if (IntNo == Intrinsic::arm_mve_vst4q &&
N->getConstantOperandVal(7) != 3)
16363 bool isLoadOp =
true;
16364 unsigned NewOpc = 0;
16365 unsigned NumVecs = 0;
16369 case Intrinsic::arm_mve_vld2q:
16373 case Intrinsic::arm_mve_vld4q:
16377 case Intrinsic::arm_mve_vst2q:
16378 NewOpc = ARMISD::VST2_UPD;
16382 case Intrinsic::arm_mve_vst4q:
16383 NewOpc = ARMISD::VST4_UPD;
16392 VecTy =
N->getValueType(0);
16394 VecTy =
N->getOperand(3).getValueType();
16408 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16410 for (n = 0; n < NumResultVecs; ++n)
16412 Tys[n++] = MVT::i32;
16413 Tys[n] = MVT::Other;
16418 Ops.push_back(
N->getOperand(0));
16419 Ops.push_back(
N->getOperand(2));
16420 Ops.push_back(Inc);
16422 for (
unsigned i = 3; i <
N->getNumOperands(); ++i)
16423 Ops.push_back(
N->getOperand(i));
16430 for (
unsigned i = 0; i < NumResultVecs; ++i)
16449 EVT VT =
N->getValueType(0);
16455 SDNode *VLD =
N->getOperand(0).getNode();
16458 unsigned NumVecs = 0;
16459 unsigned NewOpc = 0;
16461 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16464 }
else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16467 }
else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16479 if (
Use.getResNo() == NumVecs)
16482 if (
User->getOpcode() != ARMISD::VDUPLANE ||
16483 VLDLaneNo !=
User->getConstantOperandVal(1))
16490 for (n = 0; n < NumVecs; ++n)
16492 Tys[n] = MVT::Other;
16502 unsigned ResNo =
Use.getResNo();
16504 if (ResNo == NumVecs)
16511 std::vector<SDValue> VLDDupResults;
16512 for (
unsigned n = 0; n < NumVecs; ++n)
16526 EVT VT =
N->getValueType(0);
16529 if (Subtarget->hasMVEIntegerOps()) {
16533 ExtractVT = MVT::i32;
16535 N->getOperand(0),
N->getOperand(1));
16547 Op =
Op.getOperand(0);
16548 if (
Op.getOpcode() != ARMISD::VMOVIMM &&
Op.getOpcode() != ARMISD::VMVNIMM)
16552 unsigned EltSize =
Op.getScalarValueSizeInBits();
16554 unsigned Imm =
Op.getConstantOperandVal(0);
16570 if (Subtarget->hasMVEIntegerOps()) {
16573 if (
Op.getValueType() == MVT::f32)
16574 return DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0),
16576 else if (
Op.getValueType() == MVT::f16)
16577 return DAG.
getNode(ARMISD::VDUP, dl,
N->getValueType(0),
16578 DAG.
getNode(ARMISD::VMOVrh, dl, MVT::i32,
Op));
16581 if (!Subtarget->hasNEON())
16588 if (LD &&
Op.hasOneUse() && LD->isUnindexed() &&
16589 LD->getMemoryVT() ==
N->getValueType(0).getVectorElementType()) {
16590 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16595 LD->getMemoryVT(), LD->getMemOperand());
16606 EVT VT =
N->getValueType(0);
16628 assert(StVT != VT &&
"Cannot truncate to the same type");
16638 if (0 != (NumElems * FromEltSz) % ToEltSz)
16641 unsigned SizeRatio = FromEltSz / ToEltSz;
16646 NumElems * SizeRatio);
16652 for (
unsigned i = 0; i < NumElems; ++i)
16666 MVT StoreType = MVT::i8;
16668 if (TLI.
isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16688 for (
unsigned I = 0;
I <
E;
I++) {
16719 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16722 unsigned NumElements = 4;
16739 unsigned Off0 = Rev ? NumElts : 0;
16740 unsigned Off1 = Rev ? 0 : NumElts;
16742 for (
unsigned I = 0;
I < NumElts;
I += 2) {
16743 if (M[
I] >= 0 && M[
I] != (
int)(Off0 +
I / 2))
16745 if (M[
I + 1] >= 0 && M[
I + 1] != (
int)(Off1 +
I / 2))
16753 if (isVMOVNShuffle(Shuffle,
false) || isVMOVNShuffle(Shuffle,
true))
16773 unsigned NewOffset = i * NumElements * ToEltVT.
getSizeInBits() / 8;
16784 Extract = DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, MVT::v4i32, FPTrunc);
16788 NewToVT, Alignment, MMOFlags, AAInfo);
16821 unsigned NewOffset =
16829 NewToVT, Alignment, MMOFlags, AAInfo);
16851 {Extract.getOperand(0), Extract.getOperand(1)});
16882 if (Subtarget->hasNEON())
16886 if (Subtarget->hasMVEFloatOps())
16890 if (Subtarget->hasMVEIntegerOps()) {
16965 if (!Subtarget->hasNEON())
16969 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
16977 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
16979 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
16980 uint32_t IntBits = IntTy.getSizeInBits();
16981 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16982 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16993 if (
C == -1 ||
C == 0 ||
C > 32)
16998 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16999 Intrinsic::arm_neon_vcvtfp2fxu;
17002 DAG.
getConstant(IntrinsicOpcode, dl, MVT::i32),
Op->getOperand(0),
17005 if (IntBits < FloatBits)
17013 if (!Subtarget->hasMVEFloatOps())
17021 EVT VT =
N->getValueType(0);
17026 auto isIdentitySplat = [&](
SDValue Op,
bool NSZ) {
17028 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
17030 uint64_t ImmVal =
Op.getOperand(0).getConstantOperandVal(0);
17031 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
17033 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
17046 if (!isIdentitySplat(Op1.
getOperand(2), NSZ))
17057 EVT VT =
N->getValueType(0);
17060 if (!
N->getFlags().hasAllowReassociation())
17067 unsigned Opc =
A.getConstantOperandVal(0);
17068 if (
Opc != Intrinsic::arm_mve_vcmlaq)
17073 A.getOperand(3),
A.getOperand(4));
17105 if (!Subtarget->hasNEON())
17109 unsigned OpOpcode =
Op.getNode()->getOpcode();
17110 if (!
N->getValueType(0).isVector() || !
N->getValueType(0).isSimple() ||
17114 SDValue ConstVec =
N->getOperand(1);
17118 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
17120 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
17121 uint32_t IntBits = IntTy.getSizeInBits();
17122 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
17123 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17143 int32_t
C = IntVal.exactLogBase2();
17144 if (
C == -1 ||
C == 0 ||
C > 32)
17150 if (IntBits < FloatBits)
17152 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17154 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17155 : Intrinsic::arm_neon_vcvtfxu2fp;
17163 if (!ST->hasMVEIntegerOps())
17167 EVT ResVT =
N->getValueType(0);
17195 EVT AVT =
A.getValueType();
17201 auto ExtendIfNeeded = [&](
SDValue A,
unsigned ExtendCode) {
17202 EVT AVT =
A.getValueType();
17212 auto IsVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes) {
17213 if (ResVT != RetTy || N0->
getOpcode() != ExtendCode)
17216 if (ExtTypeMatches(
A, ExtTypes))
17217 return ExtendIfNeeded(
A, ExtendCode);
17220 auto IsPredVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
17230 if (ExtTypeMatches(
A, ExtTypes))
17231 return ExtendIfNeeded(
A, ExtendCode);
17234 auto IsVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
17244 if (ResVT != RetTy)
17247 if (
Mul->getOpcode() == ExtendCode &&
17248 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17250 Mul =
Mul->getOperand(0);
17259 if (ExtTypeMatches(
A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
17260 A = ExtendIfNeeded(
A, ExtendCode);
17261 B = ExtendIfNeeded(
B, ExtendCode);
17266 auto IsPredVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
17279 if (
Mul->getOpcode() == ExtendCode &&
17280 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17282 Mul =
Mul->getOperand(0);
17291 if (ExtTypeMatches(
A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
17292 A = ExtendIfNeeded(
A, ExtendCode);
17293 B = ExtendIfNeeded(
B, ExtendCode);
17304 EVT VT =
Ops[0].getValueType();
17305 if (VT == MVT::v16i8) {
17306 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17307 "Unexpected illegal long reduction opcode");
17308 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17320 DAG.
getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17333 return DAG.
getNode(ARMISD::VMLAVs, dl, ResVT,
A,
B);
17335 return DAG.
getNode(ARMISD::VMLAVu, dl, ResVT,
A,
B);
17336 if (IsVMLAV(MVT::i64,
ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17338 return Create64bitNode(ARMISD::VMLALVs, {
A,
B});
17339 if (IsVMLAV(MVT::i64,
ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17341 return Create64bitNode(ARMISD::VMLALVu, {
A,
B});
17344 DAG.
getNode(ARMISD::VMLAVs, dl, MVT::i32,
A,
B));
17347 DAG.
getNode(ARMISD::VMLAVu, dl, MVT::i32,
A,
B));
17351 return DAG.
getNode(ARMISD::VMLAVps, dl, ResVT,
A,
B, Mask);
17354 return DAG.
getNode(ARMISD::VMLAVpu, dl, ResVT,
A,
B, Mask);
17357 return Create64bitNode(ARMISD::VMLALVps, {
A,
B, Mask});
17360 return Create64bitNode(ARMISD::VMLALVpu, {
A,
B, Mask});
17363 DAG.
getNode(ARMISD::VMLAVps, dl, MVT::i32,
A,
B, Mask));
17366 DAG.
getNode(ARMISD::VMLAVpu, dl, MVT::i32,
A,
B, Mask));
17369 return DAG.
getNode(ARMISD::VADDVs, dl, ResVT,
A);
17371 return DAG.
getNode(ARMISD::VADDVu, dl, ResVT,
A);
17373 return Create64bitNode(ARMISD::VADDLVs, {
A});
17375 return Create64bitNode(ARMISD::VADDLVu, {
A});
17378 DAG.
getNode(ARMISD::VADDVs, dl, MVT::i32,
A));
17381 DAG.
getNode(ARMISD::VADDVu, dl, MVT::i32,
A));
17384 return DAG.
getNode(ARMISD::VADDVps, dl, ResVT,
A, Mask);
17386 return DAG.
getNode(ARMISD::VADDVpu, dl, ResVT,
A, Mask);
17388 return Create64bitNode(ARMISD::VADDLVps, {
A, Mask});
17390 return Create64bitNode(ARMISD::VADDLVpu, {
A, Mask});
17393 DAG.
getNode(ARMISD::VADDVps, dl, MVT::i32,
A, Mask));
17396 DAG.
getNode(ARMISD::VADDVpu, dl, MVT::i32,
A, Mask));
17403 Op =
Op->getOperand(1);
17405 Op->getOperand(0)->getOpcode() ==
ISD::MUL) {
17407 if (
Mul->getOperand(0) ==
Mul->getOperand(1) &&
17424 unsigned VecOp =
N->getOperand(0).getValueType().isVector() ? 0 : 2;
17426 if (!Shuf || !Shuf->getOperand(1).isUndef())
17431 APInt SetElts(Mask.size(), 0);
17432 for (
int E : Mask) {
17440 if (
N->getNumOperands() != VecOp + 1) {
17442 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17448 if (
Op.getValueType().isVector())
17449 Ops.push_back(
Op.getOperand(0));
17460 unsigned IsTop =
N->getConstantOperandVal(2);
17467 if (Op0->
isUndef() && !IsTop)
17472 if ((Op1->
getOpcode() == ARMISD::VQMOVNs ||
17473 Op1->
getOpcode() == ARMISD::VQMOVNu) &&
17481 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17483 APInt Op0DemandedElts =
17484 IsTop ? Op1DemandedElts
17499 unsigned IsTop =
N->getConstantOperandVal(2);
17501 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17502 APInt Op0DemandedElts =
17514 EVT VT =
N->getValueType(0);
17521 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17522 LHS.getOperand(1).isUndef() &&
RHS.getOperand(1).isUndef() &&
17526 LHS.getOperand(0),
RHS.getOperand(0));
17541 int ShiftAmt =
C->getSExtValue();
17542 if (ShiftAmt == 0) {
17548 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17549 unsigned NewOpcode =
17550 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17565 unsigned IntNo =
N->getConstantOperandVal(0);
17576 case Intrinsic::arm_neon_vshifts:
17577 case Intrinsic::arm_neon_vshiftu:
17578 case Intrinsic::arm_neon_vrshifts:
17579 case Intrinsic::arm_neon_vrshiftu:
17580 case Intrinsic::arm_neon_vrshiftn:
17581 case Intrinsic::arm_neon_vqshifts:
17582 case Intrinsic::arm_neon_vqshiftu:
17583 case Intrinsic::arm_neon_vqshiftsu:
17584 case Intrinsic::arm_neon_vqshiftns:
17585 case Intrinsic::arm_neon_vqshiftnu:
17586 case Intrinsic::arm_neon_vqshiftnsu:
17587 case Intrinsic::arm_neon_vqrshiftns:
17588 case Intrinsic::arm_neon_vqrshiftnu:
17589 case Intrinsic::arm_neon_vqrshiftnsu: {
17590 EVT VT =
N->getOperand(1).getValueType();
17592 unsigned VShiftOpc = 0;
17595 case Intrinsic::arm_neon_vshifts:
17596 case Intrinsic::arm_neon_vshiftu:
17598 VShiftOpc = ARMISD::VSHLIMM;
17601 if (
isVShiftRImm(
N->getOperand(2), VT,
false,
true, Cnt)) {
17602 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17603 : ARMISD::VSHRuIMM);
17608 case Intrinsic::arm_neon_vrshifts:
17609 case Intrinsic::arm_neon_vrshiftu:
17614 case Intrinsic::arm_neon_vqshifts:
17615 case Intrinsic::arm_neon_vqshiftu:
17620 case Intrinsic::arm_neon_vqshiftsu:
17625 case Intrinsic::arm_neon_vrshiftn:
17626 case Intrinsic::arm_neon_vqshiftns:
17627 case Intrinsic::arm_neon_vqshiftnu:
17628 case Intrinsic::arm_neon_vqshiftnsu:
17629 case Intrinsic::arm_neon_vqrshiftns:
17630 case Intrinsic::arm_neon_vqrshiftnu:
17631 case Intrinsic::arm_neon_vqrshiftnsu:
17643 case Intrinsic::arm_neon_vshifts:
17644 case Intrinsic::arm_neon_vshiftu:
17647 case Intrinsic::arm_neon_vrshifts:
17648 VShiftOpc = ARMISD::VRSHRsIMM;
17650 case Intrinsic::arm_neon_vrshiftu:
17651 VShiftOpc = ARMISD::VRSHRuIMM;
17653 case Intrinsic::arm_neon_vrshiftn:
17654 VShiftOpc = ARMISD::VRSHRNIMM;
17656 case Intrinsic::arm_neon_vqshifts:
17657 VShiftOpc = ARMISD::VQSHLsIMM;
17659 case Intrinsic::arm_neon_vqshiftu:
17660 VShiftOpc = ARMISD::VQSHLuIMM;
17662 case Intrinsic::arm_neon_vqshiftsu:
17663 VShiftOpc = ARMISD::VQSHLsuIMM;
17665 case Intrinsic::arm_neon_vqshiftns:
17666 VShiftOpc = ARMISD::VQSHRNsIMM;
17668 case Intrinsic::arm_neon_vqshiftnu:
17669 VShiftOpc = ARMISD::VQSHRNuIMM;
17671 case Intrinsic::arm_neon_vqshiftnsu:
17672 VShiftOpc = ARMISD::VQSHRNsuIMM;
17674 case Intrinsic::arm_neon_vqrshiftns:
17675 VShiftOpc = ARMISD::VQRSHRNsIMM;
17677 case Intrinsic::arm_neon_vqrshiftnu:
17678 VShiftOpc = ARMISD::VQRSHRNuIMM;
17680 case Intrinsic::arm_neon_vqrshiftnsu:
17681 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17686 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17687 N->getOperand(1), DAG.
getConstant(Cnt, dl, MVT::i32));
17690 case Intrinsic::arm_neon_vshiftins: {
17691 EVT VT =
N->getOperand(1).getValueType();
17693 unsigned VShiftOpc = 0;
17696 VShiftOpc = ARMISD::VSLIIMM;
17697 else if (
isVShiftRImm(
N->getOperand(3), VT,
false,
true, Cnt))
17698 VShiftOpc = ARMISD::VSRIIMM;
17704 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17705 N->getOperand(1),
N->getOperand(2),
17709 case Intrinsic::arm_neon_vqrshifts:
17710 case Intrinsic::arm_neon_vqrshiftu:
17714 case Intrinsic::arm_neon_vbsl: {
17716 return DAG.
getNode(ARMISD::VBSP, dl,
N->getValueType(0),
N->getOperand(1),
17717 N->getOperand(2),
N->getOperand(3));
17719 case Intrinsic::arm_mve_vqdmlah:
17720 case Intrinsic::arm_mve_vqdmlash:
17721 case Intrinsic::arm_mve_vqrdmlah:
17722 case Intrinsic::arm_mve_vqrdmlash:
17723 case Intrinsic::arm_mve_vmla_n_predicated:
17724 case Intrinsic::arm_mve_vmlas_n_predicated:
17725 case Intrinsic::arm_mve_vqdmlah_predicated:
17726 case Intrinsic::arm_mve_vqdmlash_predicated:
17727 case Intrinsic::arm_mve_vqrdmlah_predicated:
17728 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17733 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
17740 case Intrinsic::arm_mve_minv:
17741 case Intrinsic::arm_mve_maxv:
17742 case Intrinsic::arm_mve_minav:
17743 case Intrinsic::arm_mve_maxav:
17744 case Intrinsic::arm_mve_minv_predicated:
17745 case Intrinsic::arm_mve_maxv_predicated:
17746 case Intrinsic::arm_mve_minav_predicated:
17747 case Intrinsic::arm_mve_maxav_predicated: {
17750 unsigned BitWidth =
N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17757 case Intrinsic::arm_mve_addv: {
17760 bool Unsigned =
N->getConstantOperandVal(2);
17761 unsigned Opc =
Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17765 case Intrinsic::arm_mve_addlv:
17766 case Intrinsic::arm_mve_addlv_predicated: {
17769 bool Unsigned =
N->getConstantOperandVal(2);
17770 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17771 (
Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
17772 (
Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
17775 for (
unsigned i = 1, e =
N->getNumOperands(); i < e; i++)
17777 Ops.push_back(
N->getOperand(i));
17790 EVT VT =
Y.getValueType();
17793 if (Subtarget->hasMVEIntegerOps())
17795 if (Subtarget->hasNEON())
17809 EVT VT =
N->getValueType(0);
17811 if (ST->isThumb1Only() &&
N->getOpcode() ==
ISD::SHL && VT == MVT::i32 &&
17812 N->getOperand(0)->getOpcode() ==
ISD::AND &&
17813 N->getOperand(0)->hasOneUse()) {
17830 if (AndMask == 255 || AndMask == 65535)
17834 if (MaskedBits > ShiftAmt) {
17849 if (ST->hasMVEIntegerOps())
17854 switch (
N->getOpcode()) {
17860 return DAG.
getNode(ARMISD::VSHLIMM, dl, VT,
N->getOperand(0),
17867 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
17868 unsigned VShiftOpc =
17869 (
N->getOpcode() ==
ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17871 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
17887 if (!LD->isSimple() || !N0.
hasOneUse() || LD->isIndexed() ||
17890 EVT FromVT = LD->getValueType(0);
17891 EVT ToVT =
N->getValueType(0);
17898 unsigned NumElements = 0;
17899 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17901 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17903 if (NumElements == 0 ||
17913 SDValue BasePtr = LD->getBasePtr();
17914 Align Alignment = LD->getBaseAlign();
17935 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17936 Alignment, MMOFlags, AAInfo);
17942 if (FromEltVT == MVT::f16) {
17945 for (
unsigned i = 0; i < Loads.
size(); i++) {
17947 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, MVT::v8f16, Loads[i]);
17966 EVT VT =
N->getValueType(0);
17973 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17980 if (VT == MVT::i32 &&
17981 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17986 switch (
N->getOpcode()) {
17989 Opc = ARMISD::VGETLANEs;
17993 Opc = ARMISD::VGETLANEu;
18000 if (ST->hasMVEIntegerOps())
18018 Ops.push_back(Ext);
18028 if (ST->hasMVEFloatOps())
18039 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
18043 EVT VT =
Op.getValueType();
18046 if (VT != MVT::i32 ||
18059 APInt MaxC = Max.getConstantOperandAPInt(1);
18062 !(MinC + 1).isPowerOf2())
18080 EVT VT =
N->getValueType(0);
18083 if (VT == MVT::i32)
18086 if (!ST->hasMVEIntegerOps())
18092 if (VT != MVT::v4i32 && VT != MVT::v8i16)
18095 auto IsSignedSaturate = [&](
SDNode *Min,
SDNode *Max) {
18103 if (VT == MVT::v4i32)
18104 SaturateC =
APInt(32, (1 << 15) - 1,
true);
18106 SaturateC =
APInt(16, (1 << 7) - 1,
true);
18113 MaxC != ~SaturateC)
18118 if (IsSignedSaturate(
N, N0.
getNode())) {
18121 if (VT == MVT::v4i32) {
18122 HalfVT = MVT::v8i16;
18123 ExtVT = MVT::v4i16;
18125 HalfVT = MVT::v16i8;
18140 auto IsUnsignedSaturate = [&](
SDNode *Min) {
18146 if (VT == MVT::v4i32)
18147 SaturateC =
APInt(32, (1 << 16) - 1,
true);
18149 SaturateC =
APInt(16, (1 << 8) - 1,
true);
18158 if (IsUnsignedSaturate(
N)) {
18162 if (VT == MVT::v4i32) {
18163 HalfVT = MVT::v8i16;
18164 ExtConst = 0x0000FFFF;
18166 HalfVT = MVT::v16i8;
18188 const APInt *CV = &
C->getAPIntValue();
18245 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18252 if ((OrCI & Known.
Zero) != OrCI)
18258 EVT VT =
X.getValueType();
18259 unsigned BitInX = AndC->
logBase2();
18267 for (
unsigned BitInY = 0, NumActiveBits = OrCI.
getActiveBits();
18268 BitInY < NumActiveBits; ++BitInY) {
18269 if (OrCI[BitInY] == 0)
18272 Mask.setBit(BitInY);
18273 V = DAG.
getNode(ARMISD::BFI, dl, VT, V,
X,
18289 switch (
N->getOpcode()) {
18304 if (Const->isZero())
18306 else if (Const->isOne())
18314 unsigned IntOp =
N.getConstantOperandVal(1);
18315 if (IntOp != Intrinsic::test_start_loop_iterations &&
18316 IntOp != Intrinsic::loop_decrement_reg)
18342 bool Negate =
false;
18348 Cond =
N->getOperand(1);
18349 Dest =
N->getOperand(2);
18353 Cond =
N->getOperand(2);
18354 Dest =
N->getOperand(4);
18356 if (!Const->isOne() && !Const->isZero())
18358 Imm = Const->getZExtValue();
18386 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18387 "unsupported condition");
18392 unsigned IntOp =
Int->getConstantOperandVal(1);
18393 assert((
N->hasOneUse() &&
N->user_begin()->getOpcode() ==
ISD::BR) &&
18394 "expected single br user");
18395 SDNode *Br = *
N->user_begin();
18405 if (IntOp == Intrinsic::test_start_loop_iterations) {
18407 SDValue Setup = DAG.
getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18409 if (IsTrueIfZero(CC, Imm)) {
18411 Res = DAG.
getNode(ARMISD::WLS, dl, MVT::Other,
Ops);
18415 UpdateUncondBr(Br, Dest, DAG);
18417 SDValue Ops[] = {Chain, Setup, OtherTarget};
18418 Res = DAG.
getNode(ARMISD::WLS, dl, MVT::Other,
Ops);
18430 DAG.
getVTList(MVT::i32, MVT::Other), Args);
18434 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18438 if (
Target == OtherTarget)
18439 UpdateUncondBr(Br, Dest, DAG);
18445 return DAG.
getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18454 if (Cmp.getOpcode() != ARMISD::CMPZ)
18459 SDValue LHS = Cmp.getOperand(0);
18460 SDValue RHS = Cmp.getOperand(1);
18469 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18470 LHS->getOperand(0)->hasOneUse() &&
18474 return DAG.
getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18486 EVT VT =
N->getValueType(0);
18487 SDValue FalseVal =
N->getOperand(0);
18488 SDValue TrueVal =
N->getOperand(1);
18496 matchCSET(Opcode, InvertCond, TrueVal, FalseVal, Subtarget)) {
18503 return DAG.
getNode(Opcode, dl, VT, CSetOp, CSetOp, ARMcc, Cmp);
18506 if (Cmp.getOpcode() != ARMISD::CMPZ)
18510 SDValue LHS = Cmp.getOperand(0);
18511 SDValue RHS = Cmp.getOperand(1);
18515 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18539 if (CC ==
ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18540 Res = DAG.
getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18541 }
else if (CC ==
ARMCC::EQ && TrueVal == RHS) {
18544 Res = DAG.
getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18549 if (CC ==
ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18552 return DAG.
getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18553 LHS->getOperand(2), LHS->getOperand(3));
18563 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ ||
18567 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
18569 return DAG.
getNode(
N->getOpcode(),
SDLoc(
N), MVT::i32,
N->getOperand(0),
18578 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18611 Res = DAG.
getNode(ARMISD::CMOV, dl, VT,
Sub, TrueVal, ARMcc,
18623 Res = DAG.
getNode(ARMISD::CMOV, dl, VT,
Sub, FalseVal,
18643 const APInt *TrueConst;
18644 if (Subtarget->isThumb1Only() && CC ==
ARMCC::NE &&
18645 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18646 FalseVal.getOperand(1) == RHS) ||
18650 unsigned ShiftAmount = TrueConst->
logBase2();
18665 if (Known.
Zero == 0xfffffffe)
18668 else if (Known.
Zero == 0xffffff00)
18671 else if (Known.
Zero == 0xffff0000)
18684 EVT DstVT =
N->getValueType(0);
18687 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18688 EVT SrcVT = Src.getValueType();
18690 return DAG.
getNode(ARMISD::VDUP,
SDLoc(
N), DstVT, Src.getOperand(0));
18695 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18696 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18697 Src.getValueType().getScalarSizeInBits())
18698 Src = Src.getOperand(0);
18702 EVT SrcVT = Src.getValueType();
18703 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18704 Src.getOpcode() == ARMISD::VMVNIMM ||
18705 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18708 return DAG.
getNode(ARMISD::VECTOR_REG_CAST,
SDLoc(
N), DstVT, Src);
18722 EVT VT =
N->getValueType(0);
18730 if (
N->getNumOperands() == 2 &&
18734 N->getOperand(0).getOperand(1),
18735 N->getOperand(1).getOperand(0),
18736 N->getOperand(1).getOperand(1));
18739 if (
N->getNumOperands() == 2 &&
18745 if (S0->getOperand(0) ==
S1->getOperand(0) &&
18746 S0->getOperand(1) ==
S1->getOperand(1)) {
18749 Mask.append(
S1->getMask().begin(),
S1->getMask().end());
18753 ARMISD::VMOVN,
DL, VT,
18754 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(0)),
18755 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(1)),
18759 ARMISD::VMOVN,
DL, VT,
18760 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(1)),
18761 DAG.
getNode(ARMISD::VECTOR_REG_CAST,
DL, VT, S0->getOperand(0)),
18769 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18770 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18771 (Op.getOpcode() == ISD::BITCAST &&
18772 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18775 for (
unsigned Op = 0;
Op <
N->getNumOperands();
Op++) {
18777 for (
unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18795 int NumIns =
N->getNumOperands();
18796 assert((NumIns == 2 || NumIns == 4) &&
18797 "Expected 2 or 4 inputs to an MVETrunc");
18799 if (
N->getNumOperands() == 4)
18803 for (
int I = 0;
I < NumIns;
I++) {
18805 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18810 Ptr, MPI, StoreVT,
Align(4));
18825 if (!LD || !LD->isSimple() || !N0.
hasOneUse() || LD->isIndexed())
18828 EVT FromVT = LD->getMemoryVT();
18829 EVT ToVT =
N->getValueType(0);
18836 unsigned NumElements = 0;
18837 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18839 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18841 assert(NumElements != 0);
18847 LD->getExtensionType() != NewExtType)
18854 SDValue BasePtr = LD->getBasePtr();
18855 Align Alignment = LD->getBaseAlign();
18874 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18875 Alignment, MMOFlags, AAInfo);
18891 EVT VT =
N->getValueType(0);
18893 assert(
N->getNumValues() == 2 &&
"Expected MVEEXT with 2 elements");
18894 assert((VT == MVT::v4i32 || VT == MVT::v8i16) &&
"Unexpected MVEEXT type");
18896 EVT ExtVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18898 auto Extend = [&](
SDValue V) {
18907 if (
N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18908 SDValue Ext = Extend(
N->getOperand(0));
18916 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18917 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18921 auto CheckInregMask = [&](
int Start,
int Offset) {
18923 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 +
Offset)
18929 if (CheckInregMask(0, 0))
18931 else if (CheckInregMask(0, 1))
18932 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18933 else if (CheckInregMask(0, Mask.size()))
18935 else if (CheckInregMask(0, Mask.size() + 1))
18936 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18941 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18945 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18952 if (
N->getOperand(0)->getOpcode() ==
ISD::LOAD)
18963 int NumOuts =
N->getNumValues();
18964 assert((NumOuts == 2 || NumOuts == 4) &&
18965 "Expected 2 or 4 outputs to an MVEEXT");
18966 EVT LoadVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18968 if (
N->getNumOperands() == 4)
18974 StackPtr, MPI,
Align(4));
18977 for (
int I = 0;
I < NumOuts;
I++) {
18979 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18980 DAG.
getConstant(
I * 16 / NumOuts,
DL, StackPtr.getValueType()));
18985 VT, Chain, Ptr, MPI, LoadVT,
Align(4));
18994 switch (
N->getOpcode()) {
19054 case ARMISD::BRCOND:
19058 case ARMISD::CSINC:
19059 case ARMISD::CSINV:
19060 case ARMISD::CSNEG:
19073 case ARMISD::PREDICATE_CAST:
19075 case ARMISD::VECTOR_REG_CAST:
19086 case ARMISD::VADDVs:
19087 case ARMISD::VADDVu:
19088 case ARMISD::VADDLVs:
19089 case ARMISD::VADDLVu:
19090 case ARMISD::VADDLVAs:
19091 case ARMISD::VADDLVAu:
19092 case ARMISD::VMLAVs:
19093 case ARMISD::VMLAVu:
19094 case ARMISD::VMLALVs:
19095 case ARMISD::VMLALVu:
19096 case ARMISD::VMLALVAs:
19097 case ARMISD::VMLALVAu:
19099 case ARMISD::VMOVN:
19101 case ARMISD::VQMOVNs:
19102 case ARMISD::VQMOVNu:
19104 case ARMISD::VQDMULH:
19110 case ARMISD::SMULWB: {
19111 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
19117 case ARMISD::SMULWT: {
19118 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
19124 case ARMISD::SMLALBB:
19125 case ARMISD::QADD16b:
19126 case ARMISD::QSUB16b:
19127 case ARMISD::UQADD16b:
19128 case ARMISD::UQSUB16b: {
19129 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
19136 case ARMISD::SMLALBT: {
19137 unsigned LowWidth =
N->getOperand(0).getValueType().getSizeInBits();
19139 unsigned HighWidth =
N->getOperand(1).getValueType().getSizeInBits();
19146 case ARMISD::SMLALTB: {
19147 unsigned HighWidth =
N->getOperand(0).getValueType().getSizeInBits();
19149 unsigned LowWidth =
N->getOperand(1).getValueType().getSizeInBits();
19156 case ARMISD::SMLALTT: {
19157 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
19164 case ARMISD::QADD8b:
19165 case ARMISD::QSUB8b:
19166 case ARMISD::UQADD8b:
19167 case ARMISD::UQSUB8b: {
19168 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
19176 if (
N->getOperand(1) ==
N->getOperand(2))
19177 return N->getOperand(1);
19181 switch (
N->getConstantOperandVal(1)) {
19182 case Intrinsic::arm_neon_vld1:
19183 case Intrinsic::arm_neon_vld1x2:
19184 case Intrinsic::arm_neon_vld1x3:
19185 case Intrinsic::arm_neon_vld1x4:
19186 case Intrinsic::arm_neon_vld2:
19187 case Intrinsic::arm_neon_vld3:
19188 case Intrinsic::arm_neon_vld4:
19189 case Intrinsic::arm_neon_vld2lane:
19190 case Intrinsic::arm_neon_vld3lane:
19191 case Intrinsic::arm_neon_vld4lane:
19192 case Intrinsic::arm_neon_vld2dup:
19193 case Intrinsic::arm_neon_vld3dup:
19194 case Intrinsic::arm_neon_vld4dup:
19195 case Intrinsic::arm_neon_vst1:
19196 case Intrinsic::arm_neon_vst1x2:
19197 case Intrinsic::arm_neon_vst1x3:
19198 case Intrinsic::arm_neon_vst1x4:
19199 case Intrinsic::arm_neon_vst2:
19200 case Intrinsic::arm_neon_vst3:
19201 case Intrinsic::arm_neon_vst4:
19202 case Intrinsic::arm_neon_vst2lane:
19203 case Intrinsic::arm_neon_vst3lane:
19204 case Intrinsic::arm_neon_vst4lane:
19206 case Intrinsic::arm_mve_vld2q:
19207 case Intrinsic::arm_mve_vld4q:
19208 case Intrinsic::arm_mve_vst2q:
19209 case Intrinsic::arm_mve_vst4q:
19226 unsigned *
Fast)
const {
19232 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19235 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19237 if (AllowsUnaligned) {
19239 *
Fast = Subtarget->hasV7Ops();
19244 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19248 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19255 if (!Subtarget->hasMVEIntegerOps())
19259 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19260 Ty == MVT::v2i1)) {
19268 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19284 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19285 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19286 Ty == MVT::v2f64) {
19297 const AttributeList &FuncAttributes)
const {
19299 if ((
Op.isMemcpy() ||
Op.isZeroMemset()) && Subtarget->hasNEON() &&
19300 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19302 if (
Op.size() >= 16 &&
19308 }
else if (
Op.size() >= 8 &&
19325 if (!SrcTy->isIntegerTy() || !DstTy->
isIntegerTy())
19327 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19329 return (SrcBits == 64 && DestBits == 32);
19338 return (SrcBits == 64 && DestBits == 32);
19374 return Subtarget->hasFullFP16();
19381 if (!Subtarget->hasMVEIntegerOps())
19400 if (Ld->isExpandingLoad())
19404 if (Subtarget->hasMVEIntegerOps())
19417 U->getOpcode() ==
ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19449bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF,
19451 if (Subtarget->useSoftFloat())
19460 return Subtarget->hasMVEFloatOps();
19478 unsigned Scale = 1;
19495 if ((V & (Scale - 1)) != 0)
19504 if (VT.
isVector() && Subtarget->hasNEON())
19507 !Subtarget->hasMVEFloatOps())
19510 bool IsNeg =
false;
19516 unsigned NumBytes = std::max((
unsigned)VT.
getSizeInBits() / 8, 1U);
19519 if (VT.
isVector() && Subtarget->hasMVEIntegerOps()) {
19535 if (VT.
isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19541 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19571 default:
return false;
19590 int Scale = AM.
Scale;
19595 default:
return false;
19603 Scale = Scale & ~1;
19604 return Scale == 2 || Scale == 4 || Scale == 8;
19621 if (Scale & 1)
return false;
19628 const int Scale = AM.
Scale;
19638 return (Scale == 1) || (!AM.
HasBaseReg && Scale == 2);
19654 switch (AM.
Scale) {
19665 if (Subtarget->isThumb1Only())
19668 if (Subtarget->isThumb2())
19671 int Scale = AM.
Scale;
19673 default:
return false;
19677 if (Scale < 0) Scale = -Scale;
19685 if (Scale == 1 || (AM.
HasBaseReg && Scale == -1))
19698 if (Scale & 1)
return false;
19711 if (!Subtarget->isThumb())
19714 if (Subtarget->isThumb2())
19718 return Imm >= 0 && Imm <= 255;
19728 if (!Subtarget->isThumb())
19730 if (Subtarget->isThumb2())
19733 return AbsImm <= 255;
19768 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19772 int RHSC = (int)
RHS->getZExtValue();
19773 if (RHSC < 0 && RHSC > -256) {
19783 }
else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19786 int RHSC = (int)
RHS->getZExtValue();
19787 if (RHSC < 0 && RHSC > -0x1000) {
19829 int RHSC = (int)
RHS->getZExtValue();
19830 if (RHSC < 0 && RHSC > -0x100) {
19835 }
else if (RHSC > 0 && RHSC < 0x100) {
19846 bool isSEXTLoad,
bool IsMasked,
bool isLE,
19857 bool CanChangeType = isLE && !IsMasked;
19860 int RHSC = (int)
RHS->getZExtValue();
19862 auto IsInRange = [&](
int RHSC,
int Limit,
int Scale) {
19863 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19868 }
else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19879 if (VT == MVT::v4i16) {
19880 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19882 }
else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19883 if (IsInRange(RHSC, 0x80, 1))
19885 }
else if (Alignment >= 4 &&
19886 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19887 IsInRange(RHSC, 0x80, 4))
19889 else if (Alignment >= 2 &&
19890 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19891 IsInRange(RHSC, 0x80, 2))
19893 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19906 if (Subtarget->isThumb1Only())
19913 bool isSEXTLoad =
false;
19914 bool IsMasked =
false;
19916 Ptr = LD->getBasePtr();
19917 VT = LD->getMemoryVT();
19918 Alignment = LD->getAlign();
19919 AS = LD->getAddressSpace();
19922 Ptr = ST->getBasePtr();
19923 VT = ST->getMemoryVT();
19924 Alignment = ST->getAlign();
19925 AS = ST->getAddressSpace();
19927 Ptr = LD->getBasePtr();
19928 VT = LD->getMemoryVT();
19929 Alignment = LD->getAlign();
19930 AS = LD->getAddressSpace();
19934 Ptr = ST->getBasePtr();
19935 VT = ST->getMemoryVT();
19936 Alignment = ST->getAlign();
19937 AS = ST->getAddressSpace();
19952 bool isLegal =
false;
19954 isLegal = Subtarget->hasMVEIntegerOps() &&
19956 Ptr.
getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19957 Subtarget->isLittle(),
Base,
Offset, isInc, DAG);
19959 if (Subtarget->isThumb2())
19984 bool isSEXTLoad =
false, isNonExt;
19985 bool IsMasked =
false;
19987 VT = LD->getMemoryVT();
19988 Ptr = LD->getBasePtr();
19989 Alignment = LD->getAlign();
19993 VT = ST->getMemoryVT();
19994 Ptr = ST->getBasePtr();
19995 Alignment = ST->getAlign();
19996 isNonExt = !ST->isTruncatingStore();
19998 VT = LD->getMemoryVT();
19999 Ptr = LD->getBasePtr();
20000 Alignment = LD->getAlign();
20005 VT = ST->getMemoryVT();
20006 Ptr = ST->getBasePtr();
20007 Alignment = ST->getAlign();
20008 isNonExt = !ST->isTruncatingStore();
20013 if (Subtarget->isThumb1Only()) {
20016 assert(
Op->getValueType(0) == MVT::i32 &&
"Non-i32 post-inc op?!");
20017 if (
Op->getOpcode() !=
ISD::ADD || !isNonExt)
20020 if (!RHS || RHS->getZExtValue() != 4)
20022 if (Alignment <
Align(4))
20026 Base =
Op->getOperand(0);
20032 bool isLegal =
false;
20034 isLegal = Subtarget->hasMVEIntegerOps() &&
20039 if (Subtarget->isThumb2())
20053 !Subtarget->isThumb2())
20067 const APInt &DemandedElts,
20069 unsigned Depth)
const {
20072 switch (
Op.getOpcode()) {
20079 if (
Op.getResNo() == 0) {
20090 case ARMISD::CMOV: {
20105 case Intrinsic::arm_ldaex:
20106 case Intrinsic::arm_ldrex: {
20114 case ARMISD::BFI: {
20121 const APInt &Mask =
Op.getConstantOperandAPInt(2);
20122 Known.
Zero &= Mask;
20126 case ARMISD::VGETLANEs:
20127 case ARMISD::VGETLANEu: {
20128 const SDValue &SrcSV =
Op.getOperand(0);
20134 "VGETLANE index out of bounds");
20139 EVT VT =
Op.getValueType();
20145 if (
Op.getOpcode() == ARMISD::VGETLANEs)
20146 Known = Known.
sext(DstSz);
20148 Known = Known.
zext(DstSz);
20153 case ARMISD::VMOVrh: {
20156 Known = KnownOp.
zext(32);
20159 case ARMISD::CSINC:
20160 case ARMISD::CSINV:
20161 case ARMISD::CSNEG: {
20169 if (
Op.getOpcode() == ARMISD::CSINC)
20172 else if (
Op.getOpcode() == ARMISD::CSINV)
20174 else if (
Op.getOpcode() == ARMISD::CSNEG)
20181 case ARMISD::VORRIMM:
20182 case ARMISD::VBICIMM: {
20183 unsigned Encoded =
Op.getConstantOperandVal(1);
20184 unsigned DecEltBits = 0;
20187 unsigned EltBits =
Op.getScalarValueSizeInBits();
20188 if (EltBits != DecEltBits) {
20197 bool IsVORR =
Op.getOpcode() == ARMISD::VORRIMM;
20198 APInt Imm(DecEltBits, DecodedVal);
20200 Known.
One = IsVORR ? (KnownLHS.
One | Imm) : (KnownLHS.
One & ~Imm);
20201 Known.
Zero = IsVORR ? (KnownLHS.
Zero & ~Imm) : (KnownLHS.
Zero | Imm);
20209 if (!Subtarget->isThumb())
20226 if (Imm == 0 || Imm == ~0U)
20229 unsigned Opc =
Op.getOpcode();
20231 EVT VT =
Op.getValueType();
20233 unsigned ShrunkImm = Imm & Demanded;
20234 unsigned ExpandedImm = Imm | ~Demanded;
20236 auto IsLegalImm = [ShrunkImm, ExpandedImm](
unsigned CandidateImm) ->
bool {
20237 return (ShrunkImm & CandidateImm) == ShrunkImm &&
20238 (~ExpandedImm & CandidateImm) == 0;
20240 auto UseImm = [Imm,
Opc,
Op, VT, &TLO](
unsigned NewImm) ->
bool {
20252 if (ShrunkImm == 0) {
20253 ++NumOptimizedImms;
20254 return UseImm(ShrunkImm);
20260 if (ExpandedImm == ~0U) {
20261 ++NumOptimizedImms;
20262 return UseImm(ExpandedImm);
20270 if (IsLegalImm(0xFF)) {
20271 ++NumOptimizedImms;
20272 return UseImm(0xFF);
20275 if (IsLegalImm(0xFFFF)) {
20276 ++NumOptimizedImms;
20277 return UseImm(0xFFFF);
20291 ++NumOptimizedImms;
20292 return UseImm(ShrunkImm);
20300 if ((~ExpandedImm) < 256) {
20301 ++NumOptimizedImms;
20302 return UseImm(ExpandedImm);
20308 !Subtarget->hasV6Ops()) {
20309 ++NumOptimizedImms;
20310 return UseImm(ExpandedImm);
20329 EVT VT =
Op.getValueType();
20344 switch (
Op.getOpcode()) {
20355 unsigned Imm =
C->getZExtValue();
20362 unsigned Depth)
const {
20363 unsigned Opc =
Op.getOpcode();
20367 case ARMISD::LSRL: {
20371 if (
Op.getResNo() == 0 && !
Op->hasAnyUseOfValue(1) &&
20373 unsigned ShAmt =
Op->getConstantOperandVal(2);
20383 case ARMISD::VBICIMM: {
20385 unsigned ModImm =
Op.getConstantOperandVal(1);
20386 unsigned EltBits = 0;
20388 if ((OriginalDemandedBits & Mask) == 0)
20394 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
20409 if (!Subtarget->hasVFP2Base())
20413 if (ConstraintVT.
isVector() && Subtarget->hasNEON() &&
20425 unsigned S = Constraint.
size();
20427 switch (Constraint[0]) {
20439 }
else if (S == 2) {
20440 switch (Constraint[0]) {
20457 Value *CallOperandVal =
info.CallOperandVal;
20460 if (!CallOperandVal)
20464 switch (*constraint) {
20470 if (Subtarget->isThumb())
20485 if (PR == 0 || VT == MVT::Other)
20487 if (ARM::SPRRegClass.
contains(PR))
20488 return VT != MVT::f32 && VT != MVT::f16 && VT != MVT::i32;
20489 if (ARM::DPRRegClass.
contains(PR))
20494using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20498 switch (Constraint.
size()) {
20501 switch (Constraint[0]) {
20503 if (Subtarget->isThumb())
20504 return RCPair(0U, &ARM::tGPRRegClass);
20505 return RCPair(0U, &ARM::GPRRegClass);
20507 if (Subtarget->isThumb())
20508 return RCPair(0U, &ARM::hGPRRegClass);
20511 if (Subtarget->isThumb1Only())
20512 return RCPair(0U, &ARM::tGPRRegClass);
20513 return RCPair(0U, &ARM::GPRRegClass);
20515 if (VT == MVT::Other)
20517 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20518 return RCPair(0U, &ARM::SPRRegClass);
20520 return RCPair(0U, &ARM::DPRRegClass);
20522 return RCPair(0U, &ARM::QPRRegClass);
20525 if (VT == MVT::Other)
20527 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20528 return RCPair(0U, &ARM::SPR_8RegClass);
20530 return RCPair(0U, &ARM::DPR_8RegClass);
20532 return RCPair(0U, &ARM::QPR_8RegClass);
20535 if (VT == MVT::Other)
20537 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20538 return RCPair(0U, &ARM::SPRRegClass);
20540 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20542 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20548 if (Constraint[0] ==
'T') {
20549 switch (Constraint[1]) {
20553 return RCPair(0U, &ARM::tGPREvenRegClass);
20555 return RCPair(0U, &ARM::tGPROddRegClass);
20564 if (
StringRef(
"{cc}").equals_insensitive(Constraint))
20565 return std::make_pair(
unsigned(ARM::CPSR), &ARM::CCRRegClass);
20568 if (
StringRef(
"{r14}").equals_insensitive(Constraint))
20569 return std::make_pair(
unsigned(ARM::LR),
getRegClassFor(MVT::i32));
20573 return {0,
nullptr};
20581 std::vector<SDValue> &
Ops,
20586 if (Constraint.
size() != 1)
20589 char ConstraintLetter = Constraint[0];
20590 switch (ConstraintLetter) {
20593 case 'I':
case 'J':
case 'K':
case 'L':
20594 case 'M':
case 'N':
case 'O':
20599 int64_t CVal64 =
C->getSExtValue();
20600 int CVal = (int) CVal64;
20603 if (CVal != CVal64)
20606 switch (ConstraintLetter) {
20610 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20611 if (CVal >= 0 && CVal <= 65535)
20615 if (Subtarget->isThumb1Only()) {
20618 if (CVal >= 0 && CVal <= 255)
20620 }
else if (Subtarget->isThumb2()) {
20634 if (Subtarget->isThumb1Only()) {
20639 if (CVal >= -255 && CVal <= -1)
20645 if (CVal >= -4095 && CVal <= 4095)
20651 if (Subtarget->isThumb1Only()) {
20658 }
else if (Subtarget->isThumb2()) {
20678 if (Subtarget->isThumb1Only()) {
20681 if (CVal >= -7 && CVal < 7)
20683 }
else if (Subtarget->isThumb2()) {
20703 if (Subtarget->isThumb1Only()) {
20706 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20712 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20718 if (Subtarget->isThumb1Only()) {
20720 if (CVal >= 0 && CVal <= 31)
20726 if (Subtarget->isThumb1Only()) {
20729 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20738 if (Result.getNode()) {
20739 Ops.push_back(Result);
20749 "Unhandled Opcode in getDivRemLibcall");
20755 case MVT::i8: LC =
isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
20756 case MVT::i16: LC =
isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
20757 case MVT::i32: LC =
isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
20758 case MVT::i64: LC =
isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
20767 "Unhandled Opcode in getDivRemArgList");
20771 for (
unsigned i = 0, e =
N->getNumOperands(); i != e; ++i) {
20772 EVT ArgVT =
N->getOperand(i).getValueType();
20777 Args.push_back(Entry);
20785 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20786 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20787 Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
20788 "Register-based DivRem lowering only");
20789 unsigned Opcode =
Op->getOpcode();
20791 "Invalid opcode for Div/Rem lowering");
20793 EVT VT =
Op->getValueType(0);
20815 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20816 : Subtarget->hasDivideInARMMode();
20817 if (hasDivide &&
Op->getValueType(0).isSimple() &&
20818 Op->getSimpleValueType(0) == MVT::i32) {
20820 const SDValue Dividend =
Op->getOperand(0);
20821 const SDValue Divisor =
Op->getOperand(1);
20822 SDValue Div = DAG.
getNode(DivOpcode, dl, VT, Dividend, Divisor);
20826 SDValue Values[2] = {Div, Rem};
20845 if (
getTM().getTargetTriple().isOSWindows())
20848 TargetLowering::CallLoweringInfo CLI(DAG);
20852 Callee, std::move(Args))
20857 std::pair<SDValue, SDValue> CallInfo =
LowerCallTo(CLI);
20858 return CallInfo.first;
20864 EVT VT =
N->getValueType(0);
20870 Result[0], Result[1]);
20874 std::vector<Type*> RetTyParams;
20875 Type *RetTyElement;
20885 RetTyParams.push_back(RetTyElement);
20886 RetTyParams.push_back(RetTyElement);
20901 if (
getTM().getTargetTriple().isOSWindows())
20908 Callee, std::move(Args))
20912 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
20915 SDNode *ResNode = CallResult.first.getNode();
20922 assert(
getTM().getTargetTriple().isOSWindows() &&
20923 "unsupported target platform");
20931 "no-stack-arg-probe")) {
20935 Chain =
SP.getValue(1);
20952 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
20953 Chain = DAG.
getNode(ARMISD::WIN__CHKSTK,
DL, NodeTys, Chain, Glue);
20963 bool IsStrict =
Op->isStrictFPOpcode();
20964 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
20965 const unsigned DstSz =
Op.getValueType().getSizeInBits();
20967 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20968 "Unexpected type for custom-lowering FP_EXTEND");
20970 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20971 "With both FP DP and 16, any FP conversion is legal!");
20973 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20974 "With FP16, 16 to 32 conversion is legal!");
20977 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20982 Loc,
Op.getValueType(), SrcVal);
20997 for (
unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20998 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20999 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
21000 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
21004 {DstVT, MVT::Other}, {Chain, SrcVal});
21011 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
21012 "Unexpected type for custom-lowering FP_EXTEND");
21013 std::tie(SrcVal, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
21018 return IsStrict ? DAG.
getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
21022 bool IsStrict =
Op->isStrictFPOpcode();
21024 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
21026 EVT DstVT =
Op.getValueType();
21027 const unsigned DstSz =
Op.getValueType().getSizeInBits();
21030 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
21031 "Unexpected type for custom-lowering FP_ROUND");
21033 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
21034 "With both FP DP and 16, any FP conversion is legal!");
21039 if (SrcSz == 32 && Subtarget->hasFP16())
21044 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
21045 "Unexpected type for custom-lowering FP_ROUND");
21049 std::tie(Result, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
21061 if (v == 0xffffffff)
21073 bool ForCodeSize)
const {
21074 if (!Subtarget->hasVFP3Base())
21076 if (VT == MVT::f16 && Subtarget->hasFullFP16())