75#define DEBUG_TYPE "arm-low-overhead-loops"
76#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
80 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
85 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
90 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
94 return MI->findRegisterDefOperandIdx(ARM::VPR,
nullptr) != -1;
98 return MI.findRegisterUseOperandIdx(ARM::VPR,
nullptr) != -1;
113 if (
MI.isDebugInstr())
128 class PostOrderLoopTraversal {
136 :
ML(
ML), MLI(MLI) { }
152 if (!
ML.contains(Succ))
161 ML.getExitBlocks(ExitBlocks);
165 Search(
ML.getHeader());
175 if (
auto *Preheader =
ML.getLoopPreheader())
176 GetPredecessor(Preheader);
178 GetPredecessor(Preheader);
190 bool hasUniformPredicate() {
return getDivergent() ==
nullptr; }
196 for (
unsigned i = 1; i < Insts.
size(); ++i) {
207 assert(Insts.
size() <= 5 &&
"Too many instructions in VPT block!");
212 unsigned size()
const {
return Insts.
size(); }
222 friend struct LowOverheadLoop;
226 std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;
229 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
230 &&
"Can't begin VPT without predicate");
235 PredicatedInsts[
MI] = CurrentPredicates;
240 PredicatedInsts[
MI] = CurrentPredicates;
250 CurrentPredicates.
clear();
256 bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
258 if (Exclusive && Predicates.
size() != 1)
267 bool isEntryPredicatedOnVCTP(VPTBlock &
Block,
bool Exclusive =
false) {
269 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
279 "Expected VPT block to begin with VPT/VPST");
289 return !MI->mayStore() && !MI->mayLoad() &&
290 !isHorizontalReduction(*MI) && !isVCTP(MI);
296 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
309 for (
auto *Def : Defs)
317 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
318 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
319 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
327 if (isEntryPredicatedOnVCTP(
Block,
false) &&
329 return getVPTInstrPredicate(*MI) == ARMVCC::Else;
332 if (hasImplicitlyValidVPT(
Block,
RDA))
339 "Expected VPT block to start with a VPST or VPT!");
340 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
344 for (
auto *
MI : Insts) {
354 if (!isPredicatedOnVCTP(
MI)) {
364 struct LowOverheadLoop {
385 bool CannotTailPredicate =
false;
393 MF =
ML.getHeader()->getParent();
394 if (
auto *
MBB =
ML.getLoopPreheader())
406 CannotTailPredicate = !ValidateMVEInst(
MI);
409 bool IsTailPredicationLegal()
const {
412 return !Revert && FoundAllComponents() && !VCTPs.
empty() &&
413 !CannotTailPredicate &&
ML.getNumBlocks() == 1;
423 bool ValidateTailPredicate();
427 bool ValidateLiveOuts();
433 bool FoundAllComponents()
const {
434 return Start && Dec &&
End;
442 if (IsTailPredicationLegal())
443 return TPNumElements;
444 return Start->getOperand(1);
447 unsigned getStartOpcode()
const {
449 if (!IsTailPredicationLegal())
450 return IsDo ? ARM::t2DLS : ARM::t2WLS;
456 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
457 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
458 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *
End;
459 if (!VCTPs.
empty()) {
460 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
461 for (
auto *
MI : VCTPs)
464 if (!FoundAllComponents())
465 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
466 else if (!(Start && Dec && End))
467 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
478 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
496 MachineFunctionProperties::Property::NoVRegs).
set(
497 MachineFunctionProperties::Property::TracksLiveness);
507 bool RevertNonLoops();
518 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
522 void Expand(LowOverheadLoop &LoLoop);
524 void IterationCountDCE(LowOverheadLoop &LoLoop);
528char ARMLowOverheadLoops::ID = 0;
541 for (
auto *Dead : Killed)
542 BasicBlocks.
insert(Dead->getParent());
545 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
546 for (
auto *
MBB : BasicBlocks) {
547 for (
auto &
IT : *
MBB) {
548 if (
IT.getOpcode() != ARM::t2IT)
559 for (
auto *Dead : Killed) {
561 Dead->findRegisterUseOperand(ARM::ITSTATE,
nullptr)) {
564 auto &CurrentBlock = ITBlocks[
IT];
565 CurrentBlock.erase(Dead);
566 if (CurrentBlock.empty())
572 if (!ModifiedITs.
empty())
574 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
585 <<
" - can also remove:\n";
591 if (WontCorruptITs(Killed,
RDA)) {
594 dbgs() <<
" - " << *Dead);
601bool LowOverheadLoop::ValidateTailPredicate() {
602 if (!IsTailPredicationLegal()) {
604 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
605 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
609 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
610 assert(
ML.getBlocks().size() == 1 &&
611 "Shouldn't be processing a loop with more than one block");
614 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
618 if (!VPTstate.isValid(
RDA)) {
623 if (!ValidateLiveOuts()) {
633 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
634 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
635 TPNumElements = Start->getOperand(2);
636 StartInsertPt = Start;
637 StartInsertBB = Start->getParent();
646 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
654 if (StartInsertPt != StartInsertBB->
end() &&
659 ElemDef->removeFromParent();
660 StartInsertBB->
insert(StartInsertPt, ElemDef);
662 <<
"ARM Loops: Moved element count def: " << *ElemDef);
664 StartInsertPt->removeFromParent();
667 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
677 TPNumElements = Operand;
678 NumElements = TPNumElements.
getReg();
681 <<
"ARM Loops: Unable to move element count to loop "
682 <<
"start instruction.\n");
709 while (
MBB &&
MBB != StartInsertBB) {
710 if (CannotProvideElements(
MBB, NumElements)) {
711 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
728 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
739 if (InstrVecSize > VCTPVecSize) {
740 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
741 <<
"VecSize:\n" << *
MI);
767 if (TryRemove(Def,
RDA, ElementChain,
Ignore)) {
768 bool FoundSub =
false;
770 for (
auto *
MI : ElementChain) {
775 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
776 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
782 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
787 ToRemove.insert(ElementChain.begin(), ElementChain.end());
794 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
795 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
796 Preheader && !Preheader->
empty() &&
843 switch (
MI.getOpcode()) {
852 case ARM::MVE_VCLZs8:
853 case ARM::MVE_VCLZs16:
854 case ARM::MVE_VCLZs32:
868 InstSet &FalseLanesZero) {
880 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
881 Def->getOperand(1).getImm() == 0;
885 for (
auto &MO :
MI.operands()) {
903 for (
auto *Def : Defs) {
904 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
906 if (MO.
isUse() && isPredicated)
915bool LowOverheadLoop::ValidateLiveOuts() {
945 for (
auto &
MI : *Header) {
953 bool retainsOrReduces =
960 else if (
MI.getNumDefs() == 0)
962 else if (!isPredicated && retainsOrReduces) {
963 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
965 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
970 dbgs() <<
" Predicated:\n";
973 dbgs() <<
" FalseLanesZero:\n";
974 for (
auto *
I : FalseLanesZero)
976 dbgs() <<
" FalseLanesUnknown:\n";
977 for (
auto *
I : FalseLanesUnknown)
999 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
1000 for (
auto &MO :
MI->operands()) {
1005 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1017 ML.getExitBlocks(ExitBlocks);
1018 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1019 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1024 if (RegMask.PhysReg == ARM::VPR) {
1030 if (QPRs->
contains(RegMask.PhysReg))
1043 while (!Worklist.empty()) {
1045 if (
MI->getOpcode() == ARM::MQPRCopy) {
1050 Worklist.push_back(CopySrc);
1070 ?
End->getOperand(1).getMBB()
1071 :
End->getOperand(2).getMBB();
1074 if (TgtBB !=
ML.getHeader()) {
1075 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1081 if (BBUtils->getOffsetOf(
End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1082 !BBUtils->isBBInRange(
End,
ML.getHeader(), 4094)) {
1089 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1090 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1091 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1099 StartInsertBB = Start->getParent();
1103 Revert = !ValidateRanges(Start,
End, BBUtils,
ML);
1104 CannotTailPredicate = !ValidateTailPredicate();
1109 if (VCTPs.
empty()) {
1119 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1120 "definition from the main VCTP");
1132 if (
const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1133 return FS->getFrameIndex();
1140 switch (
I->getOpcode()) {
1141 case ARM::MVE_VSTRWU32:
1142 case ARM::MVE_VLDRWU32: {
1143 return I->getOperand(1).getReg() == ARM::SP &&
1144 I->memoperands().size() == 1 &&
1145 GetFrameIndex(
I->memoperands().front()) >= 0;
1154 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1160 if (
MI->memoperands().size() == 0)
1162 int FI = GetFrameIndex(
MI->memoperands().front());
1164 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1165 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1169 ML->getExitBlocks(Frontier);
1172 while (
Idx < Frontier.
size()) {
1174 bool LookAtSuccessors =
true;
1175 for (
auto &
I : *BB) {
1176 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1178 if (GetFrameIndex(
I.memoperands().front()) != FI)
1182 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1183 LookAtSuccessors =
false;
1188 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1192 if (LookAtSuccessors) {
1206 if (CannotTailPredicate)
1212 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1213 MI->getOpcode() == ARM::MVE_VPNOT) {
1233 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1240 VPTstate.addInst(
MI);
1242 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1251 bool RequiresExplicitPredication =
1254 if (
MI->getOpcode() == ARM::MQPRCopy)
1257 DoubleWidthResultInstrs.insert(
MI);
1262 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1277 VPTstate.resetPredicate(
MI);
1279 VPTstate.addPredicate(
MI);
1285 VPTstate.CreateVPTBlock(
MI);
1298 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1299 RDA = &getAnalysis<ReachingDefAnalysis>();
1300 MF->
getProperties().
set(MachineFunctionProperties::Property::TracksLiveness);
1303 TRI =
ST.getRegisterInfo();
1304 BBUtils = std::make_unique<ARMBasicBlockUtils>(*MF);
1305 BBUtils->computeAllBlockSizes();
1306 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1308 bool Changed =
false;
1309 for (
auto *
ML : *MLI) {
1310 if (
ML->isOutermost())
1311 Changed |= ProcessLoop(
ML);
1313 Changed |= RevertNonLoops();
1318 bool Changed =
false;
1322 Changed |= ProcessLoop(L);
1325 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1326 if (
auto *Preheader =
ML->getLoopPreheader())
1328 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1330 for (
auto *
MBB :
ML->getBlocks())
1338 for (
auto &
MI : *
MBB) {
1347 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDA, *
TRI, *
TII);
1351 if (LoLoop.Preheader)
1352 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1360 for (
auto &
MI : *
MBB) {
1361 if (
MI.isDebugValue())
1363 else if (
MI.getOpcode() == ARM::t2LoopDec)
1365 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1367 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1368 LoLoop.End = LoLoop.Dec = &
MI;
1371 else if (
MI.getDesc().isCall()) {
1375 LoLoop.Revert =
true;
1380 LoLoop.AnalyseMVEInst(&
MI);
1386 if (!LoLoop.FoundAllComponents()) {
1387 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1391 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1392 "Expected t2WhileLoopStart to be removed before regalloc!");
1397 if (LoLoop.Dec != LoLoop.End) {
1400 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1402 LoLoop.Revert =
true;
1405 LoLoop.Validate(BBUtils.get());
1417 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1418 ARM::tBcc : ARM::t2Bcc;
1433 if (
I->getOpcode() == ARM::t2LoopEnd) {
1448void ARMLowOverheadLoops::RevertLoopEnd(
MachineInstr *
MI,
bool SkipCmp)
const {
1452 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1453 ARM::tBcc : ARM::t2Bcc;
1459void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1461 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1467 MIB.
add(
MI->getOperand(1));
1470 MIB.
addReg(ARM::NoRegister);
1476 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1480 MIB.
add(
MI->getOperand(2));
1484 MI->eraseFromParent();
1511void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1512 if (!LoLoop.IsTailPredicationLegal())
1515 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1519 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1526 if (!TryRemove(Def, *
RDA, LoLoop.ToRemove, Killed))
1527 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1530MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1534 IterationCountDCE(LoLoop);
1539 unsigned Opc = LoLoop.getStartOpcode();
1545 Count.
getReg() == ARM::LR) {
1546 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1550 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1561 LoLoop.ToRemove.insert(Start);
1565void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1567 if (
MI->isDebugInstr())
1571 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1573 "Expected Then predicate!");
1575 MI->getOperand(PIdx + 1).setReg(0);
1578 for (
auto &
Block : LoLoop.getVPTBlocks()) {
1582 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1585 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1594 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1595 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1596 LoLoop.ToRemove.insert(TheVCMP);
1600 if (LoLoop.VPTstate.isEntryPredicatedOnVCTP(
Block,
true)) {
1602 if (
Block.hasUniformPredicate()) {
1608 for (
unsigned i = 1; i < Insts.
size(); ++i)
1609 RemovePredicate(Insts[i]);
1621 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1624 bool DivergentNextIsPredicated =
1625 DivergentNext !=
MBB->
end() &&
1630 RemovePredicate(&*
I);
1637 if (DivergentNextIsPredicated) {
1649 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1653 ReplaceVCMPWithVPT(VCMP, VCMP);
1658 LoLoop.ToRemove.insert(VPST);
1659 }
else if (
Block.containsVCTP()) {
1663 if (
Block.size() == 2) {
1665 "Found a VPST in an otherwise empty vpt block");
1666 LoLoop.ToRemove.insert(VPST);
1668 LoLoop.BlockMasksToRecompute.insert(VPST);
1669 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1676 "The instruction after a VPST must be predicated");
1680 !LoLoop.ToRemove.contains(VprDef)) {
1691 ReplaceVCMPWithVPT(VCMP, VPST);
1693 LoLoop.ToRemove.insert(VPST);
1699 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1702void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1705 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1708 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1709 ARM::MVE_LETP : ARM::t2LEUpdate;
1713 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1714 MIB.
add(
End->getOperand(Off + 0));
1715 MIB.
add(
End->getOperand(Off + 1));
1717 LoLoop.ToRemove.insert(LoLoop.Dec);
1718 LoLoop.ToRemove.insert(
End);
1733 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1742 for (
auto *
MI : VMOVCopies) {
1744 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1749 ARM::D0 + (Dst - ARM::Q0) * 2)
1750 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1755 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1756 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1760 MI->eraseFromParent();
1764 if (LoLoop.Revert) {
1766 RevertWhile(LoLoop.Start);
1768 RevertDo(LoLoop.Start);
1769 if (LoLoop.Dec == LoLoop.End)
1770 RevertLoopEndDec(LoLoop.End);
1774 ExpandVMOVCopies(LoLoop.VMOVCopies);
1775 LoLoop.Start = ExpandLoopStart(LoLoop);
1777 RemoveDeadBranch(LoLoop.Start);
1778 LoLoop.End = ExpandLoopEnd(LoLoop);
1779 RemoveDeadBranch(LoLoop.End);
1780 if (LoLoop.IsTailPredicationLegal())
1781 ConvertVPTBlocks(LoLoop);
1782 for (
auto *
I : LoLoop.ToRemove) {
1784 I->eraseFromParent();
1786 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1787 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1793 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1805bool ARMLowOverheadLoops::RevertNonLoops() {
1806 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1807 bool Changed =
false;
1809 for (
auto &
MBB : *MF) {
1816 for (
auto &
I :
MBB) {
1819 else if (
I.getOpcode() == ARM::t2LoopDec)
1821 else if (
I.getOpcode() == ARM::t2LoopEnd)
1823 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1825 else if (
I.getOpcode() == ARM::MQPRCopy)
1835 for (
auto *Start : Starts) {
1841 for (
auto *Dec : Decs)
1844 for (
auto *
End : Ends)
1846 for (
auto *
End : EndDecs)
1847 RevertLoopEndDec(
End);
1848 for (
auto *
MI : MQPRCopies) {
1850 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1853 MI->getOperand(0).getReg())
1854 .
add(
MI->getOperand(1))
1855 .
add(
MI->getOperand(1));
1857 MI->eraseFromParent();
1864 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
static bool isDomainMVE(MachineInstr *MI)
SmallPtrSet< MachineInstr *, 2 > Uses
static bool isVectorPredicated(MachineInstr *MI)
ReachingDefAnalysis & RDA
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
ReachingDefAnalysis InstSet & ToRemove
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefAnalysis InstSet InstSet & Ignore
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
void reset()
Re-run the analysis.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Pair of physical register and lane mask.