76#define DEBUG_TYPE "arm-low-overhead-loops"
77#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
81 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
86 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
91 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
95 return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
99 return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
114 if (
MI.isDebugInstr())
123 class PostOrderLoopTraversal {
131 :
ML(
ML), MLI(MLI) { }
147 if (!
ML.contains(Succ))
156 ML.getExitBlocks(ExitBlocks);
160 Search(
ML.getHeader());
170 if (
auto *Preheader =
ML.getLoopPreheader())
171 GetPredecessor(Preheader);
173 GetPredecessor(Preheader);
177 struct PredicatedMI {
183 assert(
I &&
"Instruction must not be null!");
194 friend struct LowOverheadLoop;
201 std::unique_ptr<PredicatedMI>> PredicatedInsts;
204 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
205 &&
"Can't begin VPT without predicate");
210 PredicatedInsts.emplace(
211 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
214 static void reset() {
216 PredicatedInsts.clear();
217 CurrentPredicates.
clear();
222 PredicatedInsts.emplace(
223 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
233 CurrentPredicates.
clear();
240 static bool hasUniformPredicate(VPTState &
Block) {
241 return getDivergent(
Block) ==
nullptr;
248 for (
unsigned i = 1; i < Insts.
size(); ++i) {
257 static bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
259 if (Exclusive && Predicates.
size() != 1)
265 static bool isEntryPredicatedOnVCTP(VPTState &
Block,
266 bool Exclusive =
false) {
268 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
274 static bool hasImplicitlyValidVPT(VPTState &
Block,
279 "Expected VPT block to begin with VPT/VPST");
286 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
299 for (
auto *Def : Defs)
307 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
308 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
309 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
317 if (isEntryPredicatedOnVCTP(
Block,
false) ||
325 "Expected VPT block to start with a VPST or VPT!");
326 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
330 for (
auto *
MI : Insts) {
340 if (!isPredicatedOnVCTP(
MI)) {
354 assert(Insts.size() <= 5 &&
"Too many instructions in VPT block!");
357 bool containsVCTP()
const {
361 unsigned size()
const {
return Insts.size(); }
365 struct LowOverheadLoop {
386 bool CannotTailPredicate =
false;
393 MF =
ML.getHeader()->getParent();
394 if (
auto *
MBB =
ML.getLoopPreheader())
407 CannotTailPredicate = !ValidateMVEInst(
MI);
410 bool IsTailPredicationLegal()
const {
413 return !Revert && FoundAllComponents() && !VCTPs.
empty() &&
414 !CannotTailPredicate &&
ML.getNumBlocks() == 1;
424 bool ValidateTailPredicate();
428 bool ValidateLiveOuts();
434 bool FoundAllComponents()
const {
435 return Start && Dec &&
End;
439 return VPTState::Blocks;
445 if (IsTailPredicationLegal())
446 return TPNumElements;
447 return Start->getOperand(1);
450 unsigned getStartOpcode()
const {
452 if (!IsTailPredicationLegal())
453 return IsDo ? ARM::t2DLS : ARM::t2WLS;
459 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
460 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
461 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *
End;
462 if (!VCTPs.
empty()) {
463 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
464 for (
auto *
MI : VCTPs)
467 if (!FoundAllComponents())
468 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
469 else if (!(Start && Dec && End))
470 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
481 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
499 MachineFunctionProperties::Property::NoVRegs).
set(
500 MachineFunctionProperties::Property::TracksLiveness);
510 bool RevertNonLoops();
521 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
525 void Expand(LowOverheadLoop &LoLoop);
527 void IterationCountDCE(LowOverheadLoop &LoLoop);
531char ARMLowOverheadLoops::ID = 0;
536 std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
549 for (
auto *Dead : Killed)
550 BasicBlocks.
insert(Dead->getParent());
553 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
554 for (
auto *
MBB : BasicBlocks) {
555 for (
auto &
IT : *
MBB) {
556 if (
IT.getOpcode() != ARM::t2IT)
567 for (
auto *Dead : Killed) {
568 if (
MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
571 auto &CurrentBlock = ITBlocks[
IT];
572 CurrentBlock.erase(Dead);
573 if (CurrentBlock.empty())
579 if (!ModifiedITs.
empty())
581 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
592 <<
" - can also remove:\n";
598 if (WontCorruptITs(Killed,
RDA)) {
601 dbgs() <<
" - " << *Dead);
608bool LowOverheadLoop::ValidateTailPredicate() {
609 if (!IsTailPredicationLegal()) {
611 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
612 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
616 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
617 assert(
ML.getBlocks().size() == 1 &&
618 "Shouldn't be processing a loop with more than one block");
621 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
625 if (!VPTState::isValid(
RDA)) {
630 if (!ValidateLiveOuts()) {
640 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
641 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
642 TPNumElements = Start->getOperand(2);
643 StartInsertPt = Start;
644 StartInsertBB = Start->getParent();
653 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
661 if (StartInsertPt != StartInsertBB->
end() &&
666 ElemDef->removeFromParent();
667 StartInsertBB->
insert(StartInsertPt, ElemDef);
669 <<
"ARM Loops: Moved element count def: " << *ElemDef);
671 StartInsertPt->removeFromParent();
674 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
684 TPNumElements = Operand;
685 NumElements = TPNumElements.
getReg();
688 <<
"ARM Loops: Unable to move element count to loop "
689 <<
"start instruction.\n");
716 while (
MBB &&
MBB != StartInsertBB) {
717 if (CannotProvideElements(
MBB, NumElements)) {
718 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
735 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
746 if (InstrVecSize > VCTPVecSize) {
747 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
748 <<
"VecSize:\n" << *
MI);
774 if (TryRemove(Def,
RDA, ElementChain,
Ignore)) {
775 bool FoundSub =
false;
777 for (
auto *
MI : ElementChain) {
782 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
783 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
789 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
794 ToRemove.insert(ElementChain.begin(), ElementChain.end());
801 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
802 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
803 Preheader && !Preheader->
empty() &&
856 switch (
MI.getOpcode()) {
865 case ARM::MVE_VCLZs8:
866 case ARM::MVE_VCLZs16:
867 case ARM::MVE_VCLZs32:
881 InstSet &FalseLanesZero) {
893 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
894 Def->getOperand(1).getImm() == 0;
898 for (
auto &MO :
MI.operands()) {
916 for (
auto *Def : Defs) {
917 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
919 if (MO.
isUse() && isPredicated)
928bool LowOverheadLoop::ValidateLiveOuts() {
958 for (
auto &
MI : *Header) {
966 bool retainsOrReduces =
973 else if (
MI.getNumDefs() == 0)
975 else if (!isPredicated && retainsOrReduces) {
976 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
978 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
983 dbgs() <<
" Predicated:\n";
984 for (
auto *
I : Predicated)
986 dbgs() <<
" FalseLanesZero:\n";
987 for (
auto *
I : FalseLanesZero)
989 dbgs() <<
" FalseLanesUnknown:\n";
990 for (
auto *
I : FalseLanesUnknown)
999 if (
Use !=
MI && !Predicated.count(
Use))
1012 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
1013 for (
auto &MO :
MI->operands()) {
1016 if (!HasPredicatedUsers(
MI, MO, Predicated)) {
1018 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1025 Predicated.insert(
MI);
1030 ML.getExitBlocks(ExitBlocks);
1031 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1032 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1037 if (RegMask.PhysReg == ARM::VPR) {
1043 if (QPRs->
contains(RegMask.PhysReg))
1056 while (!Worklist.empty()) {
1058 if (
MI->getOpcode() == ARM::MQPRCopy) {
1063 Worklist.push_back(CopySrc);
1083 ?
End->getOperand(1).getMBB()
1084 :
End->getOperand(2).getMBB();
1087 if (TgtBB !=
ML.getHeader()) {
1088 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1094 if (BBUtils->getOffsetOf(
End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1095 !BBUtils->isBBInRange(
End,
ML.getHeader(), 4094)) {
1102 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1103 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1104 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1112 StartInsertBB = Start->getParent();
1116 Revert = !ValidateRanges(Start,
End, BBUtils,
ML);
1117 CannotTailPredicate = !ValidateTailPredicate();
1122 if (VCTPs.
empty()) {
1132 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1133 "definition from the main VCTP");
1145 if (
const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1146 return FS->getFrameIndex();
1153 switch (
I->getOpcode()) {
1154 case ARM::MVE_VSTRWU32:
1155 case ARM::MVE_VLDRWU32: {
1156 return I->getOperand(1).getReg() == ARM::SP &&
1157 I->memoperands().size() == 1 &&
1158 GetFrameIndex(
I->memoperands().front()) >= 0;
1167 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1173 if (
MI->memoperands().size() == 0)
1175 int FI = GetFrameIndex(
MI->memoperands().front());
1177 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1178 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1182 ML->getExitBlocks(Frontier);
1185 while (
Idx < Frontier.
size()) {
1187 bool LookAtSuccessors =
true;
1188 for (
auto &
I : *BB) {
1189 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1191 if (GetFrameIndex(
I.memoperands().front()) != FI)
1195 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1196 LookAtSuccessors =
false;
1201 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1205 if (LookAtSuccessors) {
1219 if (CannotTailPredicate)
1225 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1226 MI->getOpcode() == ARM::MVE_VPNOT) {
1246 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1253 VPTState::addInst(
MI);
1255 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1264 bool RequiresExplicitPredication =
1267 if (
MI->getOpcode() == ARM::MQPRCopy)
1270 DoubleWidthResultInstrs.insert(
MI);
1275 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1290 VPTState::resetPredicate(
MI);
1292 VPTState::addPredicate(
MI);
1298 VPTState::CreateVPTBlock(
MI);
1311 MLI = &getAnalysis<MachineLoopInfo>();
1312 RDA = &getAnalysis<ReachingDefAnalysis>();
1313 MF->
getProperties().
set(MachineFunctionProperties::Property::TracksLiveness);
1316 TRI =
ST.getRegisterInfo();
1318 BBUtils->computeAllBlockSizes();
1319 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1321 bool Changed =
false;
1322 for (
auto *
ML : *MLI) {
1323 if (
ML->isOutermost())
1324 Changed |= ProcessLoop(
ML);
1326 Changed |= RevertNonLoops();
1332 bool Changed =
false;
1336 Changed |= ProcessLoop(L);
1339 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1340 if (
auto *Preheader =
ML->getLoopPreheader())
1342 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1344 for (
auto *
MBB :
ML->getBlocks())
1352 for (
auto &
MI : *
MBB) {
1361 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDA, *
TRI, *
TII);
1365 if (LoLoop.Preheader)
1366 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1374 for (
auto &
MI : *
MBB) {
1375 if (
MI.isDebugValue())
1377 else if (
MI.getOpcode() == ARM::t2LoopDec)
1379 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1381 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1382 LoLoop.End = LoLoop.Dec = &
MI;
1385 else if (
MI.getDesc().isCall()) {
1389 LoLoop.Revert =
true;
1394 LoLoop.AnalyseMVEInst(&
MI);
1400 if (!LoLoop.FoundAllComponents()) {
1401 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1405 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1406 "Expected t2WhileLoopStart to be removed before regalloc!");
1411 if (LoLoop.Dec != LoLoop.End) {
1414 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1416 LoLoop.Revert =
true;
1419 LoLoop.Validate(BBUtils.get());
1431 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1432 ARM::tBcc : ARM::t2Bcc;
1447 if (
I->getOpcode() == ARM::t2LoopEnd) {
1462void ARMLowOverheadLoops::RevertLoopEnd(
MachineInstr *
MI,
bool SkipCmp)
const {
1466 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1467 ARM::tBcc : ARM::t2Bcc;
1473void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1475 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1481 MIB.
add(
MI->getOperand(1));
1484 MIB.
addReg(ARM::NoRegister);
1490 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1494 MIB.
add(
MI->getOperand(2));
1498 MI->eraseFromParent();
1525void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1526 if (!LoLoop.IsTailPredicationLegal())
1529 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1533 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1540 if (!TryRemove(Def, *
RDA, LoLoop.ToRemove, Killed))
1541 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1544MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1548 IterationCountDCE(LoLoop);
1553 unsigned Opc = LoLoop.getStartOpcode();
1559 Count.
getReg() == ARM::LR) {
1560 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1564 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1575 LoLoop.ToRemove.insert(Start);
1579void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1581 if (
MI->isDebugInstr())
1585 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1587 "Expected Then predicate!");
1589 MI->getOperand(PIdx + 1).setReg(0);
1592 for (
auto &
Block : LoLoop.getVPTBlocks()) {
1596 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1599 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1608 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1609 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1610 LoLoop.ToRemove.insert(TheVCMP);
1614 if (VPTState::isEntryPredicatedOnVCTP(
Block,
true)) {
1616 if (VPTState::hasUniformPredicate(
Block)) {
1622 for (
unsigned i = 1; i < Insts.
size(); ++i)
1623 RemovePredicate(Insts[i]);
1635 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1638 bool DivergentNextIsPredicated =
1639 DivergentNext !=
MBB->
end() &&
1644 RemovePredicate(&*
I);
1651 if (DivergentNextIsPredicated) {
1663 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1667 ReplaceVCMPWithVPT(VCMP, VCMP);
1672 LoLoop.ToRemove.insert(VPST);
1673 }
else if (
Block.containsVCTP()) {
1677 if (
Block.size() == 2) {
1679 "Found a VPST in an otherwise empty vpt block");
1680 LoLoop.ToRemove.insert(VPST);
1682 LoLoop.BlockMasksToRecompute.insert(VPST);
1683 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1690 "The instruction after a VPST must be predicated");
1694 !LoLoop.ToRemove.contains(VprDef)) {
1705 ReplaceVCMPWithVPT(VCMP, VPST);
1707 LoLoop.ToRemove.insert(VPST);
1713 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1716void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1719 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1722 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1723 ARM::MVE_LETP : ARM::t2LEUpdate;
1727 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1728 MIB.
add(
End->getOperand(Off + 0));
1729 MIB.
add(
End->getOperand(Off + 1));
1731 LoLoop.ToRemove.insert(LoLoop.Dec);
1732 LoLoop.ToRemove.insert(
End);
1747 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1756 for (
auto *
MI : VMOVCopies) {
1758 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1763 ARM::D0 + (Dst - ARM::Q0) * 2)
1764 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1769 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1770 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1774 MI->eraseFromParent();
1778 if (LoLoop.Revert) {
1780 RevertWhile(LoLoop.Start);
1782 RevertDo(LoLoop.Start);
1783 if (LoLoop.Dec == LoLoop.End)
1784 RevertLoopEndDec(LoLoop.End);
1788 ExpandVMOVCopies(LoLoop.VMOVCopies);
1789 LoLoop.Start = ExpandLoopStart(LoLoop);
1791 RemoveDeadBranch(LoLoop.Start);
1792 LoLoop.End = ExpandLoopEnd(LoLoop);
1793 RemoveDeadBranch(LoLoop.End);
1794 if (LoLoop.IsTailPredicationLegal())
1795 ConvertVPTBlocks(LoLoop);
1796 for (
auto *
I : LoLoop.ToRemove) {
1798 I->eraseFromParent();
1800 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1801 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1807 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1810 for (
auto *
MBB : PostOrder) {
1824bool ARMLowOverheadLoops::RevertNonLoops() {
1825 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1826 bool Changed =
false;
1828 for (
auto &
MBB : *MF) {
1835 for (
auto &
I :
MBB) {
1838 else if (
I.getOpcode() == ARM::t2LoopDec)
1840 else if (
I.getOpcode() == ARM::t2LoopEnd)
1842 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1844 else if (
I.getOpcode() == ARM::MQPRCopy)
1854 for (
auto *Start : Starts) {
1860 for (
auto *Dec : Decs)
1863 for (
auto *
End : Ends)
1865 for (
auto *
End : EndDecs)
1866 RevertLoopEndDec(
End);
1867 for (
auto *
MI : MQPRCopies) {
1869 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1872 MI->getOperand(0).getReg())
1873 .
add(
MI->getOperand(1))
1874 .
add(
MI->getOperand(1));
1876 MI->eraseFromParent();
1883 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
static bool isDomainMVE(MachineInstr *MI)
SmallPtrSet< MachineInstr *, 2 > Uses
static bool isVectorPredicated(MachineInstr *MI)
ReachingDefAnalysis & RDA
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
ReachingDefAnalysis InstSet & ToRemove
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefAnalysis InstSet InstSet & Ignore
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
void reset()
Re-run the analysis.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
void clear()
Completely clear the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Pair of physical register and lane mask.