73#define DEBUG_TYPE "arm-low-overhead-loops"
74#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
78 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
83 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
88 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
92 return MI->findRegisterDefOperandIdx(ARM::VPR,
nullptr) != -1;
96 return MI.findRegisterUseOperandIdx(ARM::VPR,
nullptr) != -1;
111 if (
MI.isDebugInstr())
126 class PostOrderLoopTraversal {
134 :
ML(
ML), MLI(MLI) { }
149 if (!
ML.contains(Succ))
158 ML.getExitBlocks(ExitBlocks);
162 Search(
ML.getHeader());
172 if (
auto *Preheader =
ML.getLoopPreheader())
173 GetPredecessor(Preheader);
175 GetPredecessor(Preheader);
187 bool hasUniformPredicate() {
return getDivergent() ==
nullptr; }
193 for (
unsigned i = 1; i < Insts.
size(); ++i) {
204 assert(Insts.
size() <= 5 &&
"Too many instructions in VPT block!");
209 unsigned size()
const {
return Insts.
size(); }
219 friend struct LowOverheadLoop;
223 std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;
226 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
227 &&
"Can't begin VPT without predicate");
232 PredicatedInsts[
MI] = CurrentPredicates;
237 PredicatedInsts[
MI] = CurrentPredicates;
247 CurrentPredicates.
clear();
253 bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
255 if (Exclusive && Predicates.
size() != 1)
264 bool isEntryPredicatedOnVCTP(VPTBlock &
Block,
bool Exclusive =
false) {
266 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
276 "Expected VPT block to begin with VPT/VPST");
286 return !MI->mayStore() && !MI->mayLoad() &&
287 !isHorizontalReduction(*MI) && !isVCTP(MI);
293 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
306 for (
auto *Def : Defs)
314 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
315 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
316 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
324 if (isEntryPredicatedOnVCTP(
Block,
false) &&
326 return getVPTInstrPredicate(*MI) == ARMVCC::Else;
329 if (hasImplicitlyValidVPT(
Block,
RDA))
336 "Expected VPT block to start with a VPST or VPT!");
337 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
341 for (
auto *
MI : Insts) {
351 if (!isPredicatedOnVCTP(
MI)) {
361 struct LowOverheadLoop {
382 bool CannotTailPredicate =
false;
390 MF =
ML.getHeader()->getParent();
391 if (
auto *
MBB =
ML.getLoopPreheader())
403 CannotTailPredicate = !ValidateMVEInst(
MI);
406 bool IsTailPredicationLegal()
const {
409 return !Revert && FoundAllComponents() && !VCTPs.
empty() &&
410 !CannotTailPredicate &&
ML.getNumBlocks() == 1;
420 bool ValidateTailPredicate();
424 bool ValidateLiveOuts();
430 bool FoundAllComponents()
const {
431 return Start && Dec &&
End;
439 if (IsTailPredicationLegal())
440 return TPNumElements;
441 return Start->getOperand(1);
444 unsigned getStartOpcode()
const {
446 if (!IsTailPredicationLegal())
447 return IsDo ? ARM::t2DLS : ARM::t2WLS;
453 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
454 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
455 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *
End;
456 if (!VCTPs.
empty()) {
457 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
458 for (
auto *
MI : VCTPs)
461 if (!FoundAllComponents())
462 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
463 else if (!(Start && Dec && End))
464 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
475 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
493 MachineFunctionProperties::Property::NoVRegs).
set(
494 MachineFunctionProperties::Property::TracksLiveness);
504 bool RevertNonLoops();
515 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
519 void Expand(LowOverheadLoop &LoLoop);
521 void IterationCountDCE(LowOverheadLoop &LoLoop);
525char ARMLowOverheadLoops::ID = 0;
538 for (
auto *Dead : Killed)
539 BasicBlocks.
insert(Dead->getParent());
542 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
543 for (
auto *
MBB : BasicBlocks) {
544 for (
auto &
IT : *
MBB) {
545 if (
IT.getOpcode() != ARM::t2IT)
556 for (
auto *Dead : Killed) {
558 Dead->findRegisterUseOperand(ARM::ITSTATE,
nullptr)) {
561 auto &CurrentBlock = ITBlocks[
IT];
562 CurrentBlock.erase(Dead);
563 if (CurrentBlock.empty())
569 if (!ModifiedITs.
empty())
571 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
582 <<
" - can also remove:\n";
588 if (WontCorruptITs(Killed,
RDA)) {
591 dbgs() <<
" - " << *Dead);
598bool LowOverheadLoop::ValidateTailPredicate() {
599 if (!IsTailPredicationLegal()) {
601 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
602 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
606 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
607 assert(
ML.getBlocks().size() == 1 &&
608 "Shouldn't be processing a loop with more than one block");
611 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
615 if (!VPTstate.isValid(
RDA)) {
620 if (!ValidateLiveOuts()) {
630 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
631 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
632 TPNumElements = Start->getOperand(2);
633 StartInsertPt = Start;
634 StartInsertBB = Start->getParent();
643 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
651 if (StartInsertPt != StartInsertBB->
end() &&
656 ElemDef->removeFromParent();
657 StartInsertBB->
insert(StartInsertPt, ElemDef);
659 <<
"ARM Loops: Moved element count def: " << *ElemDef);
661 StartInsertPt->removeFromParent();
664 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
674 TPNumElements = Operand;
675 NumElements = TPNumElements.
getReg();
678 <<
"ARM Loops: Unable to move element count to loop "
679 <<
"start instruction.\n");
706 while (
MBB &&
MBB != StartInsertBB) {
707 if (CannotProvideElements(
MBB, NumElements)) {
708 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
725 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
736 if (InstrVecSize > VCTPVecSize) {
737 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
738 <<
"VecSize:\n" << *
MI);
764 if (TryRemove(Def,
RDA, ElementChain,
Ignore)) {
765 bool FoundSub =
false;
767 for (
auto *
MI : ElementChain) {
772 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
773 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
779 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
784 ToRemove.insert(ElementChain.begin(), ElementChain.end());
791 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
792 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
793 Preheader && !Preheader->
empty() &&
840 switch (
MI.getOpcode()) {
849 case ARM::MVE_VCLZs8:
850 case ARM::MVE_VCLZs16:
851 case ARM::MVE_VCLZs32:
865 InstSet &FalseLanesZero) {
877 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
878 Def->getOperand(1).getImm() == 0;
882 for (
auto &MO :
MI.operands()) {
900 for (
auto *Def : Defs) {
901 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
903 if (MO.
isUse() && isPredicated)
912bool LowOverheadLoop::ValidateLiveOuts() {
942 for (
auto &
MI : *Header) {
950 bool retainsOrReduces =
957 else if (
MI.getNumDefs() == 0)
959 else if (!isPredicated && retainsOrReduces) {
960 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
962 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
967 dbgs() <<
" Predicated:\n";
970 dbgs() <<
" FalseLanesZero:\n";
971 for (
auto *
I : FalseLanesZero)
973 dbgs() <<
" FalseLanesUnknown:\n";
974 for (
auto *
I : FalseLanesUnknown)
996 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
997 for (
auto &MO :
MI->operands()) {
1002 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1014 ML.getExitBlocks(ExitBlocks);
1015 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1016 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1021 if (RegMask.PhysReg == ARM::VPR) {
1027 if (QPRs->
contains(RegMask.PhysReg))
1040 while (!Worklist.empty()) {
1042 if (
MI->getOpcode() == ARM::MQPRCopy) {
1047 Worklist.push_back(CopySrc);
1067 ?
End->getOperand(1).getMBB()
1068 :
End->getOperand(2).getMBB();
1071 if (TgtBB !=
ML.getHeader()) {
1072 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1078 if (BBUtils->getOffsetOf(
End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1079 !BBUtils->isBBInRange(
End,
ML.getHeader(), 4094)) {
1086 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1087 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1088 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1096 StartInsertBB = Start->getParent();
1100 Revert = !ValidateRanges(Start,
End, BBUtils,
ML);
1101 CannotTailPredicate = !ValidateTailPredicate();
1106 if (VCTPs.
empty()) {
1116 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1117 "definition from the main VCTP");
1129 if (
const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1130 return FS->getFrameIndex();
1137 switch (
I->getOpcode()) {
1138 case ARM::MVE_VSTRWU32:
1139 case ARM::MVE_VLDRWU32: {
1140 return I->getOperand(1).getReg() == ARM::SP &&
1141 I->memoperands().size() == 1 &&
1142 GetFrameIndex(
I->memoperands().front()) >= 0;
1151 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1157 if (
MI->memoperands().size() == 0)
1159 int FI = GetFrameIndex(
MI->memoperands().front());
1161 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1162 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1166 ML->getExitBlocks(Frontier);
1169 while (
Idx < Frontier.
size()) {
1171 bool LookAtSuccessors =
true;
1172 for (
auto &
I : *BB) {
1173 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1175 if (GetFrameIndex(
I.memoperands().front()) != FI)
1179 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1180 LookAtSuccessors =
false;
1185 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1189 if (LookAtSuccessors) {
1203 if (CannotTailPredicate)
1209 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1210 MI->getOpcode() == ARM::MVE_VPNOT) {
1230 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1237 VPTstate.addInst(
MI);
1239 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1248 bool RequiresExplicitPredication =
1251 if (
MI->getOpcode() == ARM::MQPRCopy)
1254 DoubleWidthResultInstrs.insert(
MI);
1259 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1274 VPTstate.resetPredicate(
MI);
1276 VPTstate.addPredicate(
MI);
1282 VPTstate.CreateVPTBlock(
MI);
1295 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1296 RDA = &getAnalysis<ReachingDefAnalysis>();
1297 MF->
getProperties().
set(MachineFunctionProperties::Property::TracksLiveness);
1300 TRI =
ST.getRegisterInfo();
1301 BBUtils = std::make_unique<ARMBasicBlockUtils>(*MF);
1302 BBUtils->computeAllBlockSizes();
1303 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1305 bool Changed =
false;
1306 for (
auto *
ML : *MLI) {
1307 if (
ML->isOutermost())
1308 Changed |= ProcessLoop(
ML);
1310 Changed |= RevertNonLoops();
1315 bool Changed =
false;
1319 Changed |= ProcessLoop(L);
1322 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1323 if (
auto *Preheader =
ML->getLoopPreheader())
1325 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1327 for (
auto *
MBB :
ML->getBlocks())
1335 for (
auto &
MI : *
MBB) {
1344 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDA, *
TRI, *
TII);
1348 if (LoLoop.Preheader)
1349 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1357 for (
auto &
MI : *
MBB) {
1358 if (
MI.isDebugValue())
1360 else if (
MI.getOpcode() == ARM::t2LoopDec)
1362 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1364 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1365 LoLoop.End = LoLoop.Dec = &
MI;
1368 else if (
MI.getDesc().isCall()) {
1372 LoLoop.Revert =
true;
1377 LoLoop.AnalyseMVEInst(&
MI);
1383 if (!LoLoop.FoundAllComponents()) {
1384 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1388 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1389 "Expected t2WhileLoopStart to be removed before regalloc!");
1394 if (LoLoop.Dec != LoLoop.End) {
1397 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1399 LoLoop.Revert =
true;
1402 LoLoop.Validate(BBUtils.get());
1414 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1415 ARM::tBcc : ARM::t2Bcc;
1430 if (
I->getOpcode() == ARM::t2LoopEnd) {
1445void ARMLowOverheadLoops::RevertLoopEnd(
MachineInstr *
MI,
bool SkipCmp)
const {
1449 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1450 ARM::tBcc : ARM::t2Bcc;
1456void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1458 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1464 MIB.
add(
MI->getOperand(1));
1467 MIB.
addReg(ARM::NoRegister);
1473 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1477 MIB.
add(
MI->getOperand(2));
1481 MI->eraseFromParent();
1508void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1509 if (!LoLoop.IsTailPredicationLegal())
1512 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1516 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1523 if (!TryRemove(Def, *
RDA, LoLoop.ToRemove, Killed))
1524 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1527MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1531 IterationCountDCE(LoLoop);
1536 unsigned Opc = LoLoop.getStartOpcode();
1542 Count.
getReg() == ARM::LR) {
1543 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1547 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1558 LoLoop.ToRemove.insert(Start);
1562void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1564 if (
MI->isDebugInstr())
1568 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1570 "Expected Then predicate!");
1572 MI->getOperand(PIdx + 1).setReg(0);
1575 for (
auto &
Block : LoLoop.getVPTBlocks()) {
1579 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1582 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1591 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1592 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1593 LoLoop.ToRemove.insert(TheVCMP);
1597 if (LoLoop.VPTstate.isEntryPredicatedOnVCTP(
Block,
true)) {
1599 if (
Block.hasUniformPredicate()) {
1605 for (
unsigned i = 1; i < Insts.
size(); ++i)
1606 RemovePredicate(Insts[i]);
1618 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1621 bool DivergentNextIsPredicated =
1622 DivergentNext !=
MBB->
end() &&
1627 RemovePredicate(&*
I);
1634 if (DivergentNextIsPredicated) {
1646 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1650 ReplaceVCMPWithVPT(VCMP, VCMP);
1655 LoLoop.ToRemove.insert(VPST);
1656 }
else if (
Block.containsVCTP()) {
1660 if (
Block.size() == 2) {
1662 "Found a VPST in an otherwise empty vpt block");
1663 LoLoop.ToRemove.insert(VPST);
1665 LoLoop.BlockMasksToRecompute.insert(VPST);
1666 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1673 "The instruction after a VPST must be predicated");
1677 !LoLoop.ToRemove.contains(VprDef)) {
1688 ReplaceVCMPWithVPT(VCMP, VPST);
1690 LoLoop.ToRemove.insert(VPST);
1696 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1699void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1702 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1705 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1706 ARM::MVE_LETP : ARM::t2LEUpdate;
1710 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1711 MIB.
add(
End->getOperand(Off + 0));
1712 MIB.
add(
End->getOperand(Off + 1));
1714 LoLoop.ToRemove.insert(LoLoop.Dec);
1715 LoLoop.ToRemove.insert(
End);
1730 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1739 for (
auto *
MI : VMOVCopies) {
1741 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1746 ARM::D0 + (Dst - ARM::Q0) * 2)
1747 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1752 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1753 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1757 MI->eraseFromParent();
1761 if (LoLoop.Revert) {
1763 RevertWhile(LoLoop.Start);
1765 RevertDo(LoLoop.Start);
1766 if (LoLoop.Dec == LoLoop.End)
1767 RevertLoopEndDec(LoLoop.End);
1771 ExpandVMOVCopies(LoLoop.VMOVCopies);
1772 LoLoop.Start = ExpandLoopStart(LoLoop);
1774 RemoveDeadBranch(LoLoop.Start);
1775 LoLoop.End = ExpandLoopEnd(LoLoop);
1776 RemoveDeadBranch(LoLoop.End);
1777 if (LoLoop.IsTailPredicationLegal())
1778 ConvertVPTBlocks(LoLoop);
1779 for (
auto *
I : LoLoop.ToRemove) {
1781 I->eraseFromParent();
1783 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1784 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1790 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1802bool ARMLowOverheadLoops::RevertNonLoops() {
1803 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1804 bool Changed =
false;
1806 for (
auto &
MBB : *MF) {
1813 for (
auto &
I :
MBB) {
1816 else if (
I.getOpcode() == ARM::t2LoopDec)
1818 else if (
I.getOpcode() == ARM::t2LoopEnd)
1820 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1822 else if (
I.getOpcode() == ARM::MQPRCopy)
1832 for (
auto *Start : Starts) {
1838 for (
auto *Dec : Decs)
1841 for (
auto *
End : Ends)
1843 for (
auto *
End : EndDecs)
1844 RevertLoopEndDec(
End);
1845 for (
auto *
MI : MQPRCopies) {
1847 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1850 MI->getOperand(0).getReg())
1851 .
add(
MI->getOperand(1))
1852 .
add(
MI->getOperand(1));
1854 MI->eraseFromParent();
1861 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
static bool isDomainMVE(MachineInstr *MI)
SmallPtrSet< MachineInstr *, 2 > Uses
static bool isVectorPredicated(MachineInstr *MI)
ReachingDefAnalysis & RDA
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
ReachingDefAnalysis InstSet & ToRemove
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefAnalysis InstSet InstSet & Ignore
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
void reset()
Re-run the analysis.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Pair of physical register and lane mask.