76#define DEBUG_TYPE "arm-low-overhead-loops"
77#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
81 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
86 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
91 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
95 return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
99 return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
114 if (
MI.isDebugInstr())
123 class PostOrderLoopTraversal {
131 :
ML(
ML), MLI(MLI) { }
147 if (!
ML.contains(Succ))
156 ML.getExitBlocks(ExitBlocks);
160 Search(
ML.getHeader());
170 if (
auto *Preheader =
ML.getLoopPreheader())
171 GetPredecessor(Preheader);
173 GetPredecessor(Preheader);
177 struct PredicatedMI {
183 assert(
I &&
"Instruction must not be null!");
194 friend struct LowOverheadLoop;
201 std::unique_ptr<PredicatedMI>> PredicatedInsts;
204 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
205 &&
"Can't begin VPT without predicate");
210 PredicatedInsts.emplace(
211 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
214 static void reset() {
216 PredicatedInsts.clear();
217 CurrentPredicates.
clear();
222 PredicatedInsts.emplace(
223 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
233 CurrentPredicates.
clear();
240 static bool hasUniformPredicate(VPTState &Block) {
241 return getDivergent(Block) ==
nullptr;
248 for (
unsigned i = 1; i < Insts.
size(); ++i) {
257 static bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
259 if (Exclusive && Predicates.
size() != 1)
265 static bool isEntryPredicatedOnVCTP(VPTState &Block,
266 bool Exclusive =
false) {
268 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
274 static bool hasImplicitlyValidVPT(VPTState &Block,
279 "Expected VPT block to begin with VPT/VPST");
286 return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
299 for (
auto *Def : Defs)
307 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
308 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
309 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
316 for (
auto &Block : Blocks) {
317 if (isEntryPredicatedOnVCTP(Block,
false) ||
318 hasImplicitlyValidVPT(Block,
RDA))
325 "Expected VPT block to start with a VPST or VPT!");
326 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
330 for (
auto *
MI : Insts) {
340 if (!isPredicatedOnVCTP(
MI)) {
354 assert(Insts.size() <= 5 &&
"Too many instructions in VPT block!");
357 bool containsVCTP()
const {
361 unsigned size()
const {
return Insts.size(); }
365 struct LowOverheadLoop {
386 bool CannotTailPredicate =
false;
393 MF =
ML.getHeader()->getParent();
394 if (
auto *
MBB =
ML.getLoopPreheader())
407 CannotTailPredicate = !ValidateMVEInst(
MI);
410 bool IsTailPredicationLegal()
const {
413 return !Revert && FoundAllComponents() && !VCTPs.
empty() &&
414 !CannotTailPredicate &&
ML.getNumBlocks() == 1;
424 bool ValidateTailPredicate();
428 bool ValidateLiveOuts();
439 bool FoundAllComponents()
const {
440 return Start && Dec && End;
444 return VPTState::Blocks;
450 if (IsTailPredicationLegal())
451 return TPNumElements;
452 return Start->getOperand(1);
455 unsigned getStartOpcode()
const {
457 if (!IsTailPredicationLegal())
458 return IsDo ? ARM::t2DLS : ARM::t2WLS;
464 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
465 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
466 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *End;
467 if (!VCTPs.
empty()) {
468 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
469 for (
auto *
MI : VCTPs)
472 if (!FoundAllComponents())
473 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
474 else if (!(Start && Dec && End))
475 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
486 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
504 MachineFunctionProperties::Property::NoVRegs).
set(
505 MachineFunctionProperties::Property::TracksLiveness);
515 bool RevertNonLoops();
526 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
530 void Expand(LowOverheadLoop &LoLoop);
532 void IterationCountDCE(LowOverheadLoop &LoLoop);
536char ARMLowOverheadLoops::ID = 0;
541 std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
554 for (
auto *Dead : Killed)
555 BasicBlocks.
insert(Dead->getParent());
558 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
559 for (
auto *
MBB : BasicBlocks) {
560 for (
auto &
IT : *
MBB) {
561 if (
IT.getOpcode() != ARM::t2IT)
572 for (
auto *Dead : Killed) {
573 if (
MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
576 auto &CurrentBlock = ITBlocks[
IT];
577 CurrentBlock.erase(Dead);
578 if (CurrentBlock.empty())
584 if (!ModifiedITs.
empty())
586 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
597 <<
" - can also remove:\n";
603 if (WontCorruptITs(Killed,
RDA)) {
606 dbgs() <<
" - " << *Dead);
613bool LowOverheadLoop::ValidateTailPredicate() {
614 if (!IsTailPredicationLegal()) {
616 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
617 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
621 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
622 assert(
ML.getBlocks().size() == 1 &&
623 "Shouldn't be processing a loop with more than one block");
626 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
630 if (!VPTState::isValid(
RDA)) {
635 if (!ValidateLiveOuts()) {
645 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
646 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
647 TPNumElements = Start->getOperand(2);
648 StartInsertPt = Start;
649 StartInsertBB = Start->getParent();
658 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
666 if (StartInsertPt != StartInsertBB->
end() &&
671 ElemDef->removeFromParent();
672 StartInsertBB->
insert(StartInsertPt, ElemDef);
674 <<
"ARM Loops: Moved element count def: " << *ElemDef);
676 StartInsertPt->removeFromParent();
679 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
689 TPNumElements = Operand;
690 NumElements = TPNumElements.
getReg();
693 <<
"ARM Loops: Unable to move element count to loop "
694 <<
"start instruction.\n");
721 while (
MBB &&
MBB != StartInsertBB) {
722 if (CannotProvideElements(
MBB, NumElements)) {
723 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
740 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
751 if (InstrVecSize > VCTPVecSize) {
752 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
753 <<
"VecSize:\n" << *
MI);
779 if (TryRemove(Def,
RDA, ElementChain,
Ignore)) {
780 bool FoundSub =
false;
782 for (
auto *
MI : ElementChain) {
787 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
788 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
794 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
799 ToRemove.insert(ElementChain.begin(), ElementChain.end());
806 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
807 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
808 Preheader && !Preheader->
empty() &&
861 switch (
MI.getOpcode()) {
870 case ARM::MVE_VCLZs8:
871 case ARM::MVE_VCLZs16:
872 case ARM::MVE_VCLZs32:
886 InstSet &FalseLanesZero) {
898 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
899 Def->getOperand(1).getImm() == 0;
903 for (
auto &MO :
MI.operands()) {
921 for (
auto *Def : Defs) {
922 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
924 if (MO.
isUse() && isPredicated)
933bool LowOverheadLoop::ValidateLiveOuts() {
963 for (
auto &
MI : *Header) {
971 bool retainsOrReduces =
978 else if (
MI.getNumDefs() == 0)
980 else if (!isPredicated && retainsOrReduces) {
981 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
983 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
988 dbgs() <<
" Predicated:\n";
989 for (
auto *
I : Predicated)
991 dbgs() <<
" FalseLanesZero:\n";
992 for (
auto *
I : FalseLanesZero)
994 dbgs() <<
" FalseLanesUnknown:\n";
995 for (
auto *
I : FalseLanesUnknown)
1004 if (
Use !=
MI && !Predicated.count(
Use))
1017 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
1018 for (
auto &MO :
MI->operands()) {
1021 if (!HasPredicatedUsers(
MI, MO, Predicated)) {
1023 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1030 Predicated.insert(
MI);
1035 ML.getExitBlocks(ExitBlocks);
1036 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1037 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1042 if (RegMask.PhysReg == ARM::VPR) {
1048 if (QPRs->
contains(RegMask.PhysReg))
1061 while (!Worklist.empty()) {
1063 if (
MI->getOpcode() == ARM::MQPRCopy) {
1068 Worklist.push_back(CopySrc);
1088 ? End->getOperand(1).getMBB()
1089 : End->getOperand(2).getMBB();
1092 if (TgtBB !=
ML.getHeader()) {
1093 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1099 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1100 !BBUtils->isBBInRange(End,
ML.getHeader(), 4094)) {
1107 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1108 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1109 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1117 StartInsertBB = Start->getParent();
1121 Revert = !ValidateRanges(Start, End, BBUtils,
ML);
1122 CannotTailPredicate = !ValidateTailPredicate();
1127 if (VCTPs.
empty()) {
1137 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1138 "definition from the main VCTP");
1150 if (
const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1151 return FS->getFrameIndex();
1158 switch (
I->getOpcode()) {
1159 case ARM::MVE_VSTRWU32:
1160 case ARM::MVE_VLDRWU32: {
1161 return I->getOperand(1).getReg() == ARM::SP &&
1162 I->memoperands().size() == 1 &&
1163 GetFrameIndex(
I->memoperands().front()) >= 0;
1172 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1178 if (
MI->memoperands().size() == 0)
1180 int FI = GetFrameIndex(
MI->memoperands().front());
1182 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1183 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1187 ML->getExitBlocks(Frontier);
1190 while (
Idx < Frontier.
size()) {
1192 bool LookAtSuccessors =
true;
1193 for (
auto &
I : *BB) {
1194 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1196 if (GetFrameIndex(
I.memoperands().front()) != FI)
1200 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1201 LookAtSuccessors =
false;
1206 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1210 if (LookAtSuccessors) {
1224 if (CannotTailPredicate)
1230 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1231 MI->getOpcode() == ARM::MVE_VPNOT) {
1251 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1258 VPTState::addInst(
MI);
1260 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1269 bool RequiresExplicitPredication =
1272 if (
MI->getOpcode() == ARM::MQPRCopy)
1275 DoubleWidthResultInstrs.insert(
MI);
1280 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1295 VPTState::resetPredicate(
MI);
1297 VPTState::addPredicate(
MI);
1303 VPTState::CreateVPTBlock(
MI);
1316 MLI = &getAnalysis<MachineLoopInfo>();
1317 RDA = &getAnalysis<ReachingDefAnalysis>();
1318 MF->
getProperties().
set(MachineFunctionProperties::Property::TracksLiveness);
1321 TRI =
ST.getRegisterInfo();
1323 BBUtils->computeAllBlockSizes();
1324 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1326 bool Changed =
false;
1327 for (
auto *
ML : *MLI) {
1328 if (
ML->isOutermost())
1329 Changed |= ProcessLoop(
ML);
1331 Changed |= RevertNonLoops();
1337 bool Changed =
false;
1341 Changed |= ProcessLoop(L);
1344 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1345 if (
auto *Preheader =
ML->getLoopPreheader())
1347 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1349 for (
auto *
MBB :
ML->getBlocks())
1357 for (
auto &
MI : *
MBB) {
1366 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDA, *
TRI, *
TII);
1370 if (LoLoop.Preheader)
1371 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1379 for (
auto &
MI : *
MBB) {
1380 if (
MI.isDebugValue())
1382 else if (
MI.getOpcode() == ARM::t2LoopDec)
1384 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1386 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1387 LoLoop.End = LoLoop.Dec = &
MI;
1390 else if (
MI.getDesc().isCall()) {
1394 LoLoop.Revert =
true;
1399 LoLoop.AnalyseMVEInst(&
MI);
1405 if (!LoLoop.FoundAllComponents()) {
1406 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1410 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1411 "Expected t2WhileLoopStart to be removed before regalloc!");
1416 if (LoLoop.Dec != LoLoop.End) {
1419 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1421 LoLoop.Revert =
true;
1424 LoLoop.Validate(BBUtils.get());
1436 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1437 ARM::tBcc : ARM::t2Bcc;
1452 if (
I->getOpcode() == ARM::t2LoopEnd) {
1467void ARMLowOverheadLoops::RevertLoopEnd(
MachineInstr *
MI,
bool SkipCmp)
const {
1471 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1472 ARM::tBcc : ARM::t2Bcc;
1478void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1480 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1486 MIB.
add(
MI->getOperand(1));
1489 MIB.
addReg(ARM::NoRegister);
1495 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1499 MIB.
add(
MI->getOperand(2));
1503 MI->eraseFromParent();
1530void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1531 if (!LoLoop.IsTailPredicationLegal())
1534 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1538 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1545 if (!TryRemove(Def, *
RDA, LoLoop.ToRemove, Killed))
1546 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1549MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1553 IterationCountDCE(LoLoop);
1558 unsigned Opc = LoLoop.getStartOpcode();
1564 Count.
getReg() == ARM::LR) {
1565 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1569 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1580 LoLoop.ToRemove.insert(Start);
1584void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1586 if (
MI->isDebugInstr())
1590 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1592 "Expected Then predicate!");
1594 MI->getOperand(PIdx + 1).setReg(0);
1597 for (
auto &Block : LoLoop.getVPTBlocks()) {
1601 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1604 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1613 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1614 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1615 LoLoop.ToRemove.insert(TheVCMP);
1619 if (VPTState::isEntryPredicatedOnVCTP(Block,
true)) {
1621 if (VPTState::hasUniformPredicate(Block)) {
1627 for (
unsigned i = 1; i < Insts.
size(); ++i)
1628 RemovePredicate(Insts[i]);
1637 MachineInstr *Divergent = VPTState::getDivergent(Block);
1640 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1643 bool DivergentNextIsPredicated =
1644 DivergentNext !=
MBB->
end() &&
1649 RemovePredicate(&*
I);
1656 if (DivergentNextIsPredicated) {
1668 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1672 ReplaceVCMPWithVPT(VCMP, VCMP);
1677 LoLoop.ToRemove.insert(VPST);
1678 }
else if (
Block.containsVCTP()) {
1682 if (
Block.size() == 2) {
1684 "Found a VPST in an otherwise empty vpt block");
1685 LoLoop.ToRemove.insert(VPST);
1687 LoLoop.BlockMasksToRecompute.insert(VPST);
1688 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1695 "The instruction after a VPST must be predicated");
1699 !LoLoop.ToRemove.contains(VprDef)) {
1710 ReplaceVCMPWithVPT(VCMP, VPST);
1712 LoLoop.ToRemove.insert(VPST);
1718 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1721void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1724 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1727 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1728 ARM::MVE_LETP : ARM::t2LEUpdate;
1732 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1733 MIB.
add(End->getOperand(Off + 0));
1734 MIB.
add(End->getOperand(Off + 1));
1736 LoLoop.ToRemove.insert(LoLoop.Dec);
1737 LoLoop.ToRemove.insert(End);
1752 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1761 for (
auto *
MI : VMOVCopies) {
1763 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1768 ARM::D0 + (Dst - ARM::Q0) * 2)
1769 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1774 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1775 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1779 MI->eraseFromParent();
1783 if (LoLoop.Revert) {
1785 RevertWhile(LoLoop.Start);
1787 RevertDo(LoLoop.Start);
1788 if (LoLoop.Dec == LoLoop.End)
1789 RevertLoopEndDec(LoLoop.End);
1793 ExpandVMOVCopies(LoLoop.VMOVCopies);
1794 LoLoop.Start = ExpandLoopStart(LoLoop);
1796 RemoveDeadBranch(LoLoop.Start);
1797 LoLoop.End = ExpandLoopEnd(LoLoop);
1798 RemoveDeadBranch(LoLoop.End);
1799 if (LoLoop.IsTailPredicationLegal())
1800 ConvertVPTBlocks(LoLoop);
1801 for (
auto *
I : LoLoop.ToRemove) {
1803 I->eraseFromParent();
1805 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1806 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1812 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1815 for (
auto *
MBB : PostOrder) {
1829bool ARMLowOverheadLoops::RevertNonLoops() {
1830 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1831 bool Changed =
false;
1833 for (
auto &
MBB : *MF) {
1840 for (
auto &
I :
MBB) {
1843 else if (
I.getOpcode() == ARM::t2LoopDec)
1845 else if (
I.getOpcode() == ARM::t2LoopEnd)
1847 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1849 else if (
I.getOpcode() == ARM::MQPRCopy)
1859 for (
auto *Start : Starts) {
1865 for (
auto *Dec : Decs)
1868 for (
auto *End : Ends)
1870 for (
auto *End : EndDecs)
1871 RevertLoopEndDec(End);
1872 for (
auto *
MI : MQPRCopies) {
1874 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1877 MI->getOperand(0).getReg())
1878 .
add(
MI->getOperand(1))
1879 .
add(
MI->getOperand(1));
1881 MI->eraseFromParent();
1888 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
static bool isDomainMVE(MachineInstr *MI)
SmallPtrSet< MachineInstr *, 2 > Uses
static bool isVectorPredicated(MachineInstr *MI)
ReachingDefAnalysis & RDA
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
ReachingDefAnalysis InstSet & ToRemove
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefAnalysis InstSet InstSet & Ignore
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
void reset()
Re-run the analysis.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void clear()
Completely clear the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Pair of physical register and lane mask.