File: | llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp |
Warning: | line 1049, column 44 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// Finalize v8.1-m low-overhead loops by converting the associated pseudo | |||
10 | /// instructions into machine operations. | |||
11 | /// The expectation is that the loop contains three pseudo instructions: | |||
12 | /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop | |||
13 | /// form should be in the preheader, whereas the while form should be in the | |||
14 | /// preheaders only predecessor. | |||
15 | /// - t2LoopDec - placed within in the loop body. | |||
16 | /// - t2LoopEnd - the loop latch terminator. | |||
17 | /// | |||
18 | /// In addition to this, we also look for the presence of the VCTP instruction, | |||
19 | /// which determines whether we can generated the tail-predicated low-overhead | |||
20 | /// loop form. | |||
21 | /// | |||
22 | /// Assumptions and Dependencies: | |||
23 | /// Low-overhead loops are constructed and executed using a setup instruction: | |||
24 | /// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP. | |||
25 | /// WLS(TP) and LE(TP) are branching instructions with a (large) limited range | |||
26 | /// but fixed polarity: WLS can only branch forwards and LE can only branch | |||
27 | /// backwards. These restrictions mean that this pass is dependent upon block | |||
28 | /// layout and block sizes, which is why it's the last pass to run. The same is | |||
29 | /// true for ConstantIslands, but this pass does not increase the size of the | |||
30 | /// basic blocks, nor does it change the CFG. Instructions are mainly removed | |||
31 | /// during the transform and pseudo instructions are replaced by real ones. In | |||
32 | /// some cases, when we have to revert to a 'normal' loop, we have to introduce | |||
33 | /// multiple instructions for a single pseudo (see RevertWhile and | |||
34 | /// RevertLoopEnd). To handle this situation, t2WhileLoopStart and t2LoopEnd | |||
35 | /// are defined to be as large as this maximum sequence of replacement | |||
36 | /// instructions. | |||
37 | /// | |||
38 | //===----------------------------------------------------------------------===// | |||
39 | ||||
40 | #include "ARM.h" | |||
41 | #include "ARMBaseInstrInfo.h" | |||
42 | #include "ARMBaseRegisterInfo.h" | |||
43 | #include "ARMBasicBlockInfo.h" | |||
44 | #include "ARMSubtarget.h" | |||
45 | #include "Thumb2InstrInfo.h" | |||
46 | #include "llvm/ADT/SetOperations.h" | |||
47 | #include "llvm/ADT/SmallSet.h" | |||
48 | #include "llvm/CodeGen/LivePhysRegs.h" | |||
49 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
50 | #include "llvm/CodeGen/MachineLoopInfo.h" | |||
51 | #include "llvm/CodeGen/MachineLoopUtils.h" | |||
52 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
53 | #include "llvm/CodeGen/Passes.h" | |||
54 | #include "llvm/CodeGen/ReachingDefAnalysis.h" | |||
55 | #include "llvm/MC/MCInstrDesc.h" | |||
56 | ||||
57 | using namespace llvm; | |||
58 | ||||
59 | #define DEBUG_TYPE"arm-low-overhead-loops" "arm-low-overhead-loops" | |||
60 | #define ARM_LOW_OVERHEAD_LOOPS_NAME"ARM Low Overhead Loops pass" "ARM Low Overhead Loops pass" | |||
61 | ||||
62 | namespace { | |||
63 | ||||
64 | class PostOrderLoopTraversal { | |||
65 | MachineLoop &ML; | |||
66 | MachineLoopInfo &MLI; | |||
67 | SmallPtrSet<MachineBasicBlock*, 4> Visited; | |||
68 | SmallVector<MachineBasicBlock*, 4> Order; | |||
69 | ||||
70 | public: | |||
71 | PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI) | |||
72 | : ML(ML), MLI(MLI) { } | |||
73 | ||||
74 | const SmallVectorImpl<MachineBasicBlock*> &getOrder() const { | |||
75 | return Order; | |||
76 | } | |||
77 | ||||
78 | // Visit all the blocks within the loop, as well as exit blocks and any | |||
79 | // blocks properly dominating the header. | |||
80 | void ProcessLoop() { | |||
81 | std::function<void(MachineBasicBlock*)> Search = [this, &Search] | |||
82 | (MachineBasicBlock *MBB) -> void { | |||
83 | if (Visited.count(MBB)) | |||
84 | return; | |||
85 | ||||
86 | Visited.insert(MBB); | |||
87 | for (auto *Succ : MBB->successors()) { | |||
88 | if (!ML.contains(Succ)) | |||
89 | continue; | |||
90 | Search(Succ); | |||
91 | } | |||
92 | Order.push_back(MBB); | |||
93 | }; | |||
94 | ||||
95 | // Insert exit blocks. | |||
96 | SmallVector<MachineBasicBlock*, 2> ExitBlocks; | |||
97 | ML.getExitBlocks(ExitBlocks); | |||
98 | for (auto *MBB : ExitBlocks) | |||
99 | Order.push_back(MBB); | |||
100 | ||||
101 | // Then add the loop body. | |||
102 | Search(ML.getHeader()); | |||
103 | ||||
104 | // Then try the preheader and its predecessors. | |||
105 | std::function<void(MachineBasicBlock*)> GetPredecessor = | |||
106 | [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void { | |||
107 | Order.push_back(MBB); | |||
108 | if (MBB->pred_size() == 1) | |||
109 | GetPredecessor(*MBB->pred_begin()); | |||
110 | }; | |||
111 | ||||
112 | if (auto *Preheader = ML.getLoopPreheader()) | |||
113 | GetPredecessor(Preheader); | |||
114 | else if (auto *Preheader = MLI.findLoopPreheader(&ML, true)) | |||
115 | GetPredecessor(Preheader); | |||
116 | } | |||
117 | }; | |||
118 | ||||
119 | struct PredicatedMI { | |||
120 | MachineInstr *MI = nullptr; | |||
121 | SetVector<MachineInstr*> Predicates; | |||
122 | ||||
123 | public: | |||
124 | PredicatedMI(MachineInstr *I, SetVector<MachineInstr*> &Preds) : | |||
125 | MI(I) { Predicates.insert(Preds.begin(), Preds.end()); } | |||
126 | }; | |||
127 | ||||
128 | // Represent a VPT block, a list of instructions that begins with a VPST and | |||
129 | // has a maximum of four proceeding instructions. All instructions within the | |||
130 | // block are predicated upon the vpr and we allow instructions to define the | |||
131 | // vpr within in the block too. | |||
132 | class VPTBlock { | |||
133 | std::unique_ptr<PredicatedMI> VPST; | |||
134 | PredicatedMI *Divergent = nullptr; | |||
135 | SmallVector<PredicatedMI, 4> Insts; | |||
136 | ||||
137 | public: | |||
138 | VPTBlock(MachineInstr *MI, SetVector<MachineInstr*> &Preds) { | |||
139 | VPST = std::make_unique<PredicatedMI>(MI, Preds); | |||
140 | } | |||
141 | ||||
142 | void addInst(MachineInstr *MI, SetVector<MachineInstr*> &Preds) { | |||
143 | LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Adding predicated MI: " << *MI; } } while (false); | |||
144 | if (!Divergent && !set_difference(Preds, VPST->Predicates).empty()) { | |||
145 | Divergent = &Insts.back(); | |||
146 | LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << " - has divergent predicate: " << *Divergent->MI; } } while (false); | |||
147 | } | |||
148 | Insts.emplace_back(MI, Preds); | |||
149 | assert(Insts.size() <= 4 && "Too many instructions in VPT block!")((Insts.size() <= 4 && "Too many instructions in VPT block!" ) ? static_cast<void> (0) : __assert_fail ("Insts.size() <= 4 && \"Too many instructions in VPT block!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 149, __PRETTY_FUNCTION__)); | |||
150 | } | |||
151 | ||||
152 | // Have we found an instruction within the block which defines the vpr? If | |||
153 | // so, not all the instructions in the block will have the same predicate. | |||
154 | bool HasNonUniformPredicate() const { | |||
155 | return Divergent != nullptr; | |||
156 | } | |||
157 | ||||
158 | // Is the given instruction part of the predicate set controlling the entry | |||
159 | // to the block. | |||
160 | bool IsPredicatedOn(MachineInstr *MI) const { | |||
161 | return VPST->Predicates.count(MI); | |||
162 | } | |||
163 | ||||
164 | // Is the given instruction the only predicate which controls the entry to | |||
165 | // the block. | |||
166 | bool IsOnlyPredicatedOn(MachineInstr *MI) const { | |||
167 | return IsPredicatedOn(MI) && VPST->Predicates.size() == 1; | |||
168 | } | |||
169 | ||||
170 | unsigned size() const { return Insts.size(); } | |||
171 | SmallVectorImpl<PredicatedMI> &getInsts() { return Insts; } | |||
172 | MachineInstr *getVPST() const { return VPST->MI; } | |||
173 | PredicatedMI *getDivergent() const { return Divergent; } | |||
174 | }; | |||
175 | ||||
176 | struct LowOverheadLoop { | |||
177 | ||||
178 | MachineLoop &ML; | |||
179 | MachineLoopInfo &MLI; | |||
180 | ReachingDefAnalysis &RDA; | |||
181 | const TargetRegisterInfo &TRI; | |||
182 | MachineFunction *MF = nullptr; | |||
183 | MachineInstr *InsertPt = nullptr; | |||
184 | MachineInstr *Start = nullptr; | |||
185 | MachineInstr *Dec = nullptr; | |||
186 | MachineInstr *End = nullptr; | |||
187 | MachineInstr *VCTP = nullptr; | |||
188 | VPTBlock *CurrentBlock = nullptr; | |||
189 | SetVector<MachineInstr*> CurrentPredicate; | |||
190 | SmallVector<VPTBlock, 4> VPTBlocks; | |||
191 | SmallPtrSet<MachineInstr*, 4> ToRemove; | |||
192 | bool Revert = false; | |||
193 | bool CannotTailPredicate = false; | |||
194 | ||||
195 | LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI, | |||
196 | ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI) | |||
197 | : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI) { | |||
198 | MF = ML.getHeader()->getParent(); | |||
199 | } | |||
200 | ||||
201 | // If this is an MVE instruction, check that we know how to use tail | |||
202 | // predication with it. Record VPT blocks and return whether the | |||
203 | // instruction is valid for tail predication. | |||
204 | bool ValidateMVEInst(MachineInstr *MI); | |||
205 | ||||
206 | void AnalyseMVEInst(MachineInstr *MI) { | |||
207 | CannotTailPredicate = !ValidateMVEInst(MI); | |||
208 | } | |||
209 | ||||
210 | bool IsTailPredicationLegal() const { | |||
211 | // For now, let's keep things really simple and only support a single | |||
212 | // block for tail predication. | |||
213 | return !Revert && FoundAllComponents() && VCTP && | |||
214 | !CannotTailPredicate && ML.getNumBlocks() == 1; | |||
215 | } | |||
216 | ||||
217 | // Check that the predication in the loop will be equivalent once we | |||
218 | // perform the conversion. Also ensure that we can provide the number | |||
219 | // of elements to the loop start instruction. | |||
220 | bool ValidateTailPredicate(MachineInstr *StartInsertPt); | |||
221 | ||||
222 | // Check that any values available outside of the loop will be the same | |||
223 | // after tail predication conversion. | |||
224 | bool ValidateLiveOuts() const; | |||
225 | ||||
226 | // Is it safe to define LR with DLS/WLS? | |||
227 | // LR can be defined if it is the operand to start, because it's the same | |||
228 | // value, or if it's going to be equivalent to the operand to Start. | |||
229 | MachineInstr *isSafeToDefineLR(); | |||
230 | ||||
231 | // Check the branch targets are within range and we satisfy our | |||
232 | // restrictions. | |||
233 | void CheckLegality(ARMBasicBlockUtils *BBUtils); | |||
234 | ||||
235 | bool FoundAllComponents() const { | |||
236 | return Start && Dec && End; | |||
237 | } | |||
238 | ||||
239 | SmallVectorImpl<VPTBlock> &getVPTBlocks() { return VPTBlocks; } | |||
240 | ||||
241 | // Return the loop iteration count, or the number of elements if we're tail | |||
242 | // predicating. | |||
243 | MachineOperand &getCount() { | |||
244 | return IsTailPredicationLegal() ? | |||
245 | VCTP->getOperand(1) : Start->getOperand(0); | |||
246 | } | |||
247 | ||||
248 | unsigned getStartOpcode() const { | |||
249 | bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; | |||
250 | if (!IsTailPredicationLegal()) | |||
251 | return IsDo ? ARM::t2DLS : ARM::t2WLS; | |||
252 | ||||
253 | return VCTPOpcodeToLSTP(VCTP->getOpcode(), IsDo); | |||
254 | } | |||
255 | ||||
256 | void dump() const { | |||
257 | if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; | |||
258 | if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; | |||
259 | if (End) dbgs() << "ARM Loops: Found Loop End: " << *End; | |||
260 | if (VCTP) dbgs() << "ARM Loops: Found VCTP: " << *VCTP; | |||
261 | if (!FoundAllComponents()) | |||
262 | dbgs() << "ARM Loops: Not a low-overhead loop.\n"; | |||
263 | else if (!(Start && Dec && End)) | |||
264 | dbgs() << "ARM Loops: Failed to find all loop components.\n"; | |||
265 | } | |||
266 | }; | |||
267 | ||||
268 | class ARMLowOverheadLoops : public MachineFunctionPass { | |||
269 | MachineFunction *MF = nullptr; | |||
270 | MachineLoopInfo *MLI = nullptr; | |||
271 | ReachingDefAnalysis *RDA = nullptr; | |||
272 | const ARMBaseInstrInfo *TII = nullptr; | |||
273 | MachineRegisterInfo *MRI = nullptr; | |||
274 | const TargetRegisterInfo *TRI = nullptr; | |||
275 | std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr; | |||
276 | ||||
277 | public: | |||
278 | static char ID; | |||
279 | ||||
280 | ARMLowOverheadLoops() : MachineFunctionPass(ID) { } | |||
281 | ||||
282 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
283 | AU.setPreservesCFG(); | |||
284 | AU.addRequired<MachineLoopInfo>(); | |||
285 | AU.addRequired<ReachingDefAnalysis>(); | |||
286 | MachineFunctionPass::getAnalysisUsage(AU); | |||
287 | } | |||
288 | ||||
289 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
290 | ||||
291 | MachineFunctionProperties getRequiredProperties() const override { | |||
292 | return MachineFunctionProperties().set( | |||
293 | MachineFunctionProperties::Property::NoVRegs).set( | |||
294 | MachineFunctionProperties::Property::TracksLiveness); | |||
295 | } | |||
296 | ||||
297 | StringRef getPassName() const override { | |||
298 | return ARM_LOW_OVERHEAD_LOOPS_NAME"ARM Low Overhead Loops pass"; | |||
299 | } | |||
300 | ||||
301 | private: | |||
302 | bool ProcessLoop(MachineLoop *ML); | |||
303 | ||||
304 | bool RevertNonLoops(); | |||
305 | ||||
306 | void RevertWhile(MachineInstr *MI) const; | |||
307 | ||||
308 | bool RevertLoopDec(MachineInstr *MI) const; | |||
309 | ||||
310 | void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; | |||
311 | ||||
312 | void ConvertVPTBlocks(LowOverheadLoop &LoLoop); | |||
313 | ||||
314 | MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop); | |||
315 | ||||
316 | void Expand(LowOverheadLoop &LoLoop); | |||
317 | ||||
318 | void IterationCountDCE(LowOverheadLoop &LoLoop); | |||
319 | }; | |||
320 | } | |||
321 | ||||
322 | char ARMLowOverheadLoops::ID = 0; | |||
323 | ||||
324 | INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,static void *initializeARMLowOverheadLoopsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "ARM Low Overhead Loops pass" , "arm-low-overhead-loops", &ARMLowOverheadLoops::ID, PassInfo ::NormalCtor_t(callDefaultCtor<ARMLowOverheadLoops>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeARMLowOverheadLoopsPassFlag; void llvm ::initializeARMLowOverheadLoopsPass(PassRegistry &Registry ) { llvm::call_once(InitializeARMLowOverheadLoopsPassFlag, initializeARMLowOverheadLoopsPassOnce , std::ref(Registry)); } | |||
325 | false, false)static void *initializeARMLowOverheadLoopsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "ARM Low Overhead Loops pass" , "arm-low-overhead-loops", &ARMLowOverheadLoops::ID, PassInfo ::NormalCtor_t(callDefaultCtor<ARMLowOverheadLoops>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeARMLowOverheadLoopsPassFlag; void llvm ::initializeARMLowOverheadLoopsPass(PassRegistry &Registry ) { llvm::call_once(InitializeARMLowOverheadLoopsPassFlag, initializeARMLowOverheadLoopsPassOnce , std::ref(Registry)); } | |||
326 | ||||
327 | MachineInstr *LowOverheadLoop::isSafeToDefineLR() { | |||
328 | // We can define LR because LR already contains the same value. | |||
329 | if (Start->getOperand(0).getReg() == ARM::LR) | |||
330 | return Start; | |||
331 | ||||
332 | unsigned CountReg = Start->getOperand(0).getReg(); | |||
333 | auto IsMoveLR = [&CountReg](MachineInstr *MI) { | |||
334 | return MI->getOpcode() == ARM::tMOVr && | |||
335 | MI->getOperand(0).getReg() == ARM::LR && | |||
336 | MI->getOperand(1).getReg() == CountReg && | |||
337 | MI->getOperand(2).getImm() == ARMCC::AL; | |||
338 | }; | |||
339 | ||||
340 | MachineBasicBlock *MBB = Start->getParent(); | |||
341 | ||||
342 | // Find an insertion point: | |||
343 | // - Is there a (mov lr, Count) before Start? If so, and nothing else writes | |||
344 | // to Count before Start, we can insert at that mov. | |||
345 | if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) | |||
346 | if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) | |||
347 | return LRDef; | |||
348 | ||||
349 | // - Is there a (mov lr, Count) after Start? If so, and nothing else writes | |||
350 | // to Count after Start, we can insert at that mov. | |||
351 | if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) | |||
352 | if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) | |||
353 | return LRDef; | |||
354 | ||||
355 | // We've found no suitable LR def and Start doesn't use LR directly. Can we | |||
356 | // just define LR anyway? | |||
357 | return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr; | |||
358 | } | |||
359 | ||||
360 | bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { | |||
361 | assert(VCTP && "VCTP instruction expected but is not set")((VCTP && "VCTP instruction expected but is not set") ? static_cast<void> (0) : __assert_fail ("VCTP && \"VCTP instruction expected but is not set\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 361, __PRETTY_FUNCTION__)); | |||
362 | // All predication within the loop should be based on vctp. If the block | |||
363 | // isn't predicated on entry, check whether the vctp is within the block | |||
364 | // and that all other instructions are then predicated on it. | |||
365 | for (auto &Block : VPTBlocks) { | |||
366 | if (Block.IsPredicatedOn(VCTP)) | |||
367 | continue; | |||
368 | if (!Block.HasNonUniformPredicate() || !isVCTP(Block.getDivergent()->MI)) { | |||
369 | LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found unsupported diverging predicate: " << *Block.getDivergent()->MI; } } while (false) | |||
370 | << *Block.getDivergent()->MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found unsupported diverging predicate: " << *Block.getDivergent()->MI; } } while (false); | |||
371 | return false; | |||
372 | } | |||
373 | SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts(); | |||
374 | for (auto &PredMI : Insts) { | |||
375 | if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI)) | |||
376 | continue; | |||
377 | LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI << " - which is predicated on:\n"; for (auto *MI : PredMI.Predicates) dbgs() << " - " << *MI; } } while (false) | |||
378 | << " - which is predicated on:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI << " - which is predicated on:\n"; for (auto *MI : PredMI.Predicates) dbgs() << " - " << *MI; } } while (false) | |||
379 | for (auto *MI : PredMI.Predicates)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI << " - which is predicated on:\n"; for (auto *MI : PredMI.Predicates) dbgs() << " - " << *MI; } } while (false) | |||
380 | dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI << " - which is predicated on:\n"; for (auto *MI : PredMI.Predicates) dbgs() << " - " << *MI; } } while (false); | |||
381 | return false; | |||
382 | } | |||
383 | } | |||
384 | ||||
385 | if (!ValidateLiveOuts()) | |||
386 | return false; | |||
387 | ||||
388 | // For tail predication, we need to provide the number of elements, instead | |||
389 | // of the iteration count, to the loop start instruction. The number of | |||
390 | // elements is provided to the vctp instruction, so we need to check that | |||
391 | // we can use this register at InsertPt. | |||
392 | Register NumElements = VCTP->getOperand(1).getReg(); | |||
393 | ||||
394 | // If the register is defined within loop, then we can't perform TP. | |||
395 | // TODO: Check whether this is just a mov of a register that would be | |||
396 | // available. | |||
397 | if (RDA.hasLocalDefBefore(VCTP, NumElements)) { | |||
398 | LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n" ; } } while (false); | |||
399 | return false; | |||
400 | } | |||
401 | ||||
402 | // The element count register maybe defined after InsertPt, in which case we | |||
403 | // need to try to move either InsertPt or the def so that the [w|d]lstp can | |||
404 | // use the value. | |||
405 | // TODO: On failing to move an instruction, check if the count is provided by | |||
406 | // a mov and whether we can use the mov operand directly. | |||
407 | MachineBasicBlock *InsertBB = StartInsertPt->getParent(); | |||
408 | if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) { | |||
409 | if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) { | |||
410 | if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) { | |||
411 | ElemDef->removeFromParent(); | |||
412 | InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef); | |||
413 | LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved element count def: " << *ElemDef; } } while (false) | |||
414 | << *ElemDef)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved element count def: " << *ElemDef; } } while (false); | |||
415 | } else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) { | |||
416 | StartInsertPt->removeFromParent(); | |||
417 | InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), | |||
418 | StartInsertPt); | |||
419 | LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved start past: " << *ElemDef; } } while (false); | |||
420 | } else { | |||
421 | LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to move element count to loop " << "start instruction.\n"; } } while (false) | |||
422 | << "start instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to move element count to loop " << "start instruction.\n"; } } while (false); | |||
423 | return false; | |||
424 | } | |||
425 | } | |||
426 | } | |||
427 | ||||
428 | // Especially in the case of while loops, InsertBB may not be the | |||
429 | // preheader, so we need to check that the register isn't redefined | |||
430 | // before entering the loop. | |||
431 | auto CannotProvideElements = [this](MachineBasicBlock *MBB, | |||
432 | Register NumElements) { | |||
433 | // NumElements is redefined in this block. | |||
434 | if (RDA.hasLocalDefBefore(&MBB->back(), NumElements)) | |||
435 | return true; | |||
436 | ||||
437 | // Don't continue searching up through multiple predecessors. | |||
438 | if (MBB->pred_size() > 1) | |||
439 | return true; | |||
440 | ||||
441 | return false; | |||
442 | }; | |||
443 | ||||
444 | // First, find the block that looks like the preheader. | |||
445 | MachineBasicBlock *MBB = MLI.findLoopPreheader(&ML, true); | |||
446 | if (!MBB) { | |||
447 | LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Didn't find preheader.\n" ; } } while (false); | |||
448 | return false; | |||
449 | } | |||
450 | ||||
451 | // Then search backwards for a def, until we get to InsertBB. | |||
452 | while (MBB != InsertBB) { | |||
453 | if (CannotProvideElements(MBB, NumElements)) { | |||
454 | LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to provide element count.\n" ; } } while (false); | |||
455 | return false; | |||
456 | } | |||
457 | MBB = *MBB->pred_begin(); | |||
458 | } | |||
459 | ||||
460 | // Check that the value change of the element count is what we expect and | |||
461 | // that the predication will be equivalent. For this we need: | |||
462 | // NumElements = NumElements - VectorWidth. The sub will be a sub immediate | |||
463 | // and we can also allow register copies within the chain too. | |||
464 | auto IsValidSub = [](MachineInstr *MI, unsigned ExpectedVecWidth) { | |||
465 | unsigned ImmOpIdx = 0; | |||
466 | switch (MI->getOpcode()) { | |||
467 | default: | |||
468 | llvm_unreachable("unhandled sub opcode")::llvm::llvm_unreachable_internal("unhandled sub opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 468); | |||
469 | case ARM::tSUBi3: | |||
470 | case ARM::tSUBi8: | |||
471 | ImmOpIdx = 3; | |||
472 | break; | |||
473 | case ARM::t2SUBri: | |||
474 | case ARM::t2SUBri12: | |||
475 | ImmOpIdx = 2; | |||
476 | break; | |||
477 | } | |||
478 | return MI->getOperand(ImmOpIdx).getImm() == ExpectedVecWidth; | |||
479 | }; | |||
480 | ||||
481 | MBB = VCTP->getParent(); | |||
482 | if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) { | |||
483 | SmallPtrSet<MachineInstr*, 2> ElementChain; | |||
484 | SmallPtrSet<MachineInstr*, 2> Ignore = { VCTP }; | |||
485 | unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); | |||
486 | ||||
487 | if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) { | |||
488 | bool FoundSub = false; | |||
489 | ||||
490 | for (auto *MI : ElementChain) { | |||
491 | if (isMovRegOpcode(MI->getOpcode())) | |||
492 | continue; | |||
493 | ||||
494 | if (isSubImmOpcode(MI->getOpcode())) { | |||
495 | if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) | |||
496 | return false; | |||
497 | FoundSub = true; | |||
498 | } else | |||
499 | return false; | |||
500 | } | |||
501 | ||||
502 | LLVM_DEBUG(dbgs() << "ARM Loops: Will remove element count chain:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n" ; for (auto *MI : ElementChain) dbgs() << " - " << *MI; } } while (false) | |||
503 | for (auto *MI : ElementChain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n" ; for (auto *MI : ElementChain) dbgs() << " - " << *MI; } } while (false) | |||
504 | dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n" ; for (auto *MI : ElementChain) dbgs() << " - " << *MI; } } while (false); | |||
505 | ToRemove.insert(ElementChain.begin(), ElementChain.end()); | |||
506 | } | |||
507 | } | |||
508 | return true; | |||
509 | } | |||
510 | ||||
511 | bool LowOverheadLoop::ValidateLiveOuts() const { | |||
512 | // Collect Q-regs that are live in the exit blocks. We don't collect scalars | |||
513 | // because they won't be affected by lane predication. | |||
514 | const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID); | |||
515 | SmallSet<Register, 2> LiveOuts; | |||
516 | SmallVector<MachineBasicBlock*, 2> ExitBlocks; | |||
517 | ML.getExitBlocks(ExitBlocks); | |||
518 | for (auto *MBB : ExitBlocks) | |||
519 | for (const MachineBasicBlock::RegisterMaskPair &RegMask : MBB->liveins()) | |||
520 | if (QPRs->contains(RegMask.PhysReg)) | |||
521 | LiveOuts.insert(RegMask.PhysReg); | |||
522 | ||||
523 | // Collect the instructions in the loop body that define the live-out values. | |||
524 | SmallPtrSet<MachineInstr*, 2> LiveMIs; | |||
525 | MachineBasicBlock *MBB = ML.getHeader(); | |||
526 | for (auto Reg : LiveOuts) | |||
527 | if (auto *MI = RDA.getLocalLiveOutMIDef(MBB, Reg)) | |||
528 | LiveMIs.insert(MI); | |||
529 | ||||
530 | LLVM_DEBUG(dbgs() << "ARM Loops: Found loop live-outs:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n" ; for (auto *MI : LiveMIs) dbgs() << " - " << *MI ; } } while (false) | |||
531 | for (auto *MI : LiveMIs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n" ; for (auto *MI : LiveMIs) dbgs() << " - " << *MI ; } } while (false) | |||
532 | dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n" ; for (auto *MI : LiveMIs) dbgs() << " - " << *MI ; } } while (false); | |||
533 | // We've already validated that any VPT predication within the loop will be | |||
534 | // equivalent when we perform the predication transformation; so we know that | |||
535 | // any VPT predicated instruction is predicated upon VCTP. Any live-out | |||
536 | // instruction needs to be predicated, so check this here. | |||
537 | for (auto *MI : LiveMIs) { | |||
538 | int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); | |||
539 | if (PIdx == -1 || MI->getOperand(PIdx+1).getReg() != ARM::VPR) | |||
540 | return false; | |||
541 | } | |||
542 | ||||
543 | return true; | |||
544 | } | |||
545 | ||||
546 | void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) { | |||
547 | if (Revert) | |||
548 | return; | |||
549 | ||||
550 | if (!End->getOperand(1).isMBB()) | |||
551 | report_fatal_error("Expected LoopEnd to target basic block"); | |||
552 | ||||
553 | // TODO Maybe there's cases where the target doesn't have to be the header, | |||
554 | // but for now be safe and revert. | |||
555 | if (End->getOperand(1).getMBB() != ML.getHeader()) { | |||
556 | LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: LoopEnd is not targetting header.\n" ; } } while (false); | |||
557 | Revert = true; | |||
558 | return; | |||
559 | } | |||
560 | ||||
561 | // The WLS and LE instructions have 12-bits for the label offset. WLS | |||
562 | // requires a positive offset, while LE uses negative. | |||
563 | if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) || | |||
564 | !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) { | |||
565 | LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: LE offset is out-of-range\n" ; } } while (false); | |||
566 | Revert = true; | |||
567 | return; | |||
568 | } | |||
569 | ||||
570 | if (Start->getOpcode() == ARM::t2WhileLoopStart && | |||
571 | (BBUtils->getOffsetOf(Start) > | |||
572 | BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) || | |||
573 | !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) { | |||
574 | LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: WLS offset is out-of-range!\n" ; } } while (false); | |||
575 | Revert = true; | |||
576 | return; | |||
577 | } | |||
578 | ||||
579 | InsertPt = Revert ? nullptr : isSafeToDefineLR(); | |||
580 | if (!InsertPt) { | |||
581 | LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to find safe insertion point.\n" ; } } while (false); | |||
582 | Revert = true; | |||
583 | return; | |||
584 | } else | |||
585 | LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Start insertion point: " << *InsertPt; } } while (false); | |||
586 | ||||
587 | if (!IsTailPredicationLegal()) { | |||
588 | LLVM_DEBUG(if (!VCTP)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n" ; dbgs() << "ARM Loops: Tail-predication is not valid.\n" ; } } while (false) | |||
589 | dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n" ; dbgs() << "ARM Loops: Tail-predication is not valid.\n" ; } } while (false) | |||
590 | dbgs() << "ARM Loops: Tail-predication is not valid.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n" ; dbgs() << "ARM Loops: Tail-predication is not valid.\n" ; } } while (false); | |||
591 | return; | |||
592 | } | |||
593 | ||||
594 | assert(ML.getBlocks().size() == 1 &&((ML.getBlocks().size() == 1 && "Shouldn't be processing a loop with more than one block" ) ? static_cast<void> (0) : __assert_fail ("ML.getBlocks().size() == 1 && \"Shouldn't be processing a loop with more than one block\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 595, __PRETTY_FUNCTION__)) | |||
595 | "Shouldn't be processing a loop with more than one block")((ML.getBlocks().size() == 1 && "Shouldn't be processing a loop with more than one block" ) ? static_cast<void> (0) : __assert_fail ("ML.getBlocks().size() == 1 && \"Shouldn't be processing a loop with more than one block\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 595, __PRETTY_FUNCTION__)); | |||
596 | CannotTailPredicate = !ValidateTailPredicate(InsertPt); | |||
597 | LLVM_DEBUG(if (CannotTailPredicate)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { if (CannotTailPredicate) dbgs() << "ARM Loops: Couldn't validate tail predicate.\n"; } } while (false) | |||
598 | dbgs() << "ARM Loops: Couldn't validate tail predicate.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { if (CannotTailPredicate) dbgs() << "ARM Loops: Couldn't validate tail predicate.\n"; } } while (false); | |||
599 | } | |||
600 | ||||
601 | bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { | |||
602 | if (CannotTailPredicate) | |||
603 | return false; | |||
604 | ||||
605 | // Only support a single vctp. | |||
606 | if (isVCTP(MI) && VCTP) | |||
607 | return false; | |||
608 | ||||
609 | // Start a new vpt block when we discover a vpt. | |||
610 | if (MI->getOpcode() == ARM::MVE_VPST) { | |||
611 | VPTBlocks.emplace_back(MI, CurrentPredicate); | |||
612 | CurrentBlock = &VPTBlocks.back(); | |||
613 | return true; | |||
614 | } else if (isVCTP(MI)) | |||
615 | VCTP = MI; | |||
616 | else if (MI->getOpcode() == ARM::MVE_VPSEL || | |||
617 | MI->getOpcode() == ARM::MVE_VPNOT) | |||
618 | return false; | |||
619 | ||||
620 | // TODO: Allow VPSEL and VPNOT, we currently cannot because: | |||
621 | // 1) It will use the VPR as a predicate operand, but doesn't have to be | |||
622 | // instead a VPT block, which means we can assert while building up | |||
623 | // the VPT block because we don't find another VPST to being a new | |||
624 | // one. | |||
625 | // 2) VPSEL still requires a VPR operand even after tail predicating, | |||
626 | // which means we can't remove it unless there is another | |||
627 | // instruction, such as vcmp, that can provide the VPR def. | |||
628 | ||||
629 | bool IsUse = false; | |||
630 | bool IsDef = false; | |||
631 | const MCInstrDesc &MCID = MI->getDesc(); | |||
632 | for (int i = MI->getNumOperands() - 1; i >= 0; --i) { | |||
633 | const MachineOperand &MO = MI->getOperand(i); | |||
634 | if (!MO.isReg() || MO.getReg() != ARM::VPR) | |||
635 | continue; | |||
636 | ||||
637 | if (MO.isDef()) { | |||
638 | CurrentPredicate.insert(MI); | |||
639 | IsDef = true; | |||
640 | } else if (ARM::isVpred(MCID.OpInfo[i].OperandType)) { | |||
641 | CurrentBlock->addInst(MI, CurrentPredicate); | |||
642 | IsUse = true; | |||
643 | } else { | |||
644 | LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found instruction using vpr: " << *MI; } } while (false); | |||
645 | return false; | |||
646 | } | |||
647 | } | |||
648 | ||||
649 | // If we find a vpr def that is not already predicated on the vctp, we've | |||
650 | // got disjoint predicates that may not be equivalent when we do the | |||
651 | // conversion. | |||
652 | if (IsDef && !IsUse && VCTP && !isVCTP(MI)) { | |||
653 | LLVM_DEBUG(dbgs() << "ARM Loops: Found disjoint vpr def: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found disjoint vpr def: " << *MI; } } while (false); | |||
654 | return false; | |||
655 | } | |||
656 | ||||
657 | uint64_t Flags = MCID.TSFlags; | |||
658 | if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) | |||
659 | return true; | |||
660 | ||||
661 | // If we find an instruction that has been marked as not valid for tail | |||
662 | // predication, only allow the instruction if it's contained within a valid | |||
663 | // VPT block. | |||
664 | if ((Flags & ARMII::ValidForTailPredication) == 0 && !IsUse) { | |||
665 | LLVM_DEBUG(dbgs() << "ARM Loops: Can't tail predicate: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't tail predicate: " << *MI; } } while (false); | |||
666 | return false; | |||
667 | } | |||
668 | ||||
669 | // If the instruction is already explicitly predicated, then the conversion | |||
670 | // will be fine, but ensure that all memory operations are predicated. | |||
671 | return !IsUse && MI->mayLoadOrStore() ? false : true; | |||
672 | } | |||
673 | ||||
674 | bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { | |||
675 | const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget()); | |||
676 | if (!ST.hasLOB()) | |||
677 | return false; | |||
678 | ||||
679 | MF = &mf; | |||
680 | LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"; } } while (false); | |||
681 | ||||
682 | MLI = &getAnalysis<MachineLoopInfo>(); | |||
683 | RDA = &getAnalysis<ReachingDefAnalysis>(); | |||
684 | MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); | |||
685 | MRI = &MF->getRegInfo(); | |||
686 | TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo()); | |||
687 | TRI = ST.getRegisterInfo(); | |||
688 | BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF)); | |||
689 | BBUtils->computeAllBlockSizes(); | |||
690 | BBUtils->adjustBBOffsetsAfter(&MF->front()); | |||
691 | ||||
692 | bool Changed = false; | |||
693 | for (auto ML : *MLI) { | |||
694 | if (!ML->getParentLoop()) | |||
695 | Changed |= ProcessLoop(ML); | |||
696 | } | |||
697 | Changed |= RevertNonLoops(); | |||
698 | return Changed; | |||
699 | } | |||
700 | ||||
701 | bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { | |||
702 | ||||
703 | bool Changed = false; | |||
704 | ||||
705 | // Process inner loops first. | |||
706 | for (auto I = ML->begin(), E = ML->end(); I != E; ++I) | |||
707 | Changed |= ProcessLoop(*I); | |||
708 | ||||
709 | LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
710 | if (auto *Preheader = ML->getLoopPreheader())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
711 | dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
712 | else if (auto *Preheader = MLI->findLoopPreheader(ML))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
713 | dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
714 | else if (auto *Preheader = MLI->findLoopPreheader(ML, true))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
715 | dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
716 | for (auto *MBB : ML->getBlocks())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
717 | dbgs() << " - " << MBB->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false) | |||
718 | )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n" ; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs () << " - " << Preheader->getName() << "\n" ; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n";; } } while (false); | |||
719 | ||||
720 | // Search the given block for a loop start instruction. If one isn't found, | |||
721 | // and there's only one predecessor block, search that one too. | |||
722 | std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart = | |||
723 | [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { | |||
724 | for (auto &MI : *MBB) { | |||
725 | if (isLoopStart(MI)) | |||
726 | return &MI; | |||
727 | } | |||
728 | if (MBB->pred_size() == 1) | |||
729 | return SearchForStart(*MBB->pred_begin()); | |||
730 | return nullptr; | |||
731 | }; | |||
732 | ||||
733 | LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI); | |||
734 | // Search the preheader for the start intrinsic. | |||
735 | // FIXME: I don't see why we shouldn't be supporting multiple predecessors | |||
736 | // with potentially multiple set.loop.iterations, so we need to enable this. | |||
737 | if (auto *Preheader = ML->getLoopPreheader()) | |||
738 | LoLoop.Start = SearchForStart(Preheader); | |||
739 | else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) | |||
740 | LoLoop.Start = SearchForStart(Preheader); | |||
741 | else | |||
742 | return false; | |||
743 | ||||
744 | // Find the low-overhead loop components and decide whether or not to fall | |||
745 | // back to a normal loop. Also look for a vctp instructions and decide | |||
746 | // whether we can convert that predicate using tail predication. | |||
747 | for (auto *MBB : reverse(ML->getBlocks())) { | |||
748 | for (auto &MI : *MBB) { | |||
749 | if (MI.isDebugValue()) | |||
750 | continue; | |||
751 | else if (MI.getOpcode() == ARM::t2LoopDec) | |||
752 | LoLoop.Dec = &MI; | |||
753 | else if (MI.getOpcode() == ARM::t2LoopEnd) | |||
754 | LoLoop.End = &MI; | |||
755 | else if (isLoopStart(MI)) | |||
756 | LoLoop.Start = &MI; | |||
757 | else if (MI.getDesc().isCall()) { | |||
758 | // TODO: Though the call will require LE to execute again, does this | |||
759 | // mean we should revert? Always executing LE hopefully should be | |||
760 | // faster than performing a sub,cmp,br or even subs,br. | |||
761 | LoLoop.Revert = true; | |||
762 | LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found call.\n" ; } } while (false); | |||
763 | } else { | |||
764 | // Record VPR defs and build up their corresponding vpt blocks. | |||
765 | // Check we know how to tail predicate any mve instructions. | |||
766 | LoLoop.AnalyseMVEInst(&MI); | |||
767 | } | |||
768 | } | |||
769 | } | |||
770 | ||||
771 | LLVM_DEBUG(LoLoop.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { LoLoop.dump(); } } while (false ); | |||
772 | if (!LoLoop.FoundAllComponents()) { | |||
773 | LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Didn't find loop start, update, end\n" ; } } while (false); | |||
774 | return false; | |||
775 | } | |||
776 | ||||
777 | // Check that the only instruction using LoopDec is LoopEnd. | |||
778 | // TODO: Check for copy chains that really have no effect. | |||
779 | SmallPtrSet<MachineInstr*, 2> Uses; | |||
780 | RDA->getReachingLocalUses(LoLoop.Dec, ARM::LR, Uses); | |||
781 | if (Uses.size() > 1 || !Uses.count(LoLoop.End)) { | |||
782 | LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to remove LoopDec.\n" ; } } while (false); | |||
783 | LoLoop.Revert = true; | |||
784 | } | |||
785 | LoLoop.CheckLegality(BBUtils.get()); | |||
786 | Expand(LoLoop); | |||
787 | return true; | |||
788 | } | |||
789 | ||||
790 | // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a | |||
791 | // beq that branches to the exit branch. | |||
792 | // TODO: We could also try to generate a cbz if the value in LR is also in | |||
793 | // another low register. | |||
794 | void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { | |||
795 | LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to cmp: " << *MI; } } while (false); | |||
796 | MachineBasicBlock *MBB = MI->getParent(); | |||
797 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), | |||
798 | TII->get(ARM::t2CMPri)); | |||
799 | MIB.add(MI->getOperand(0)); | |||
800 | MIB.addImm(0); | |||
801 | MIB.addImm(ARMCC::AL); | |||
802 | MIB.addReg(ARM::NoRegister); | |||
803 | ||||
804 | MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); | |||
805 | unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? | |||
806 | ARM::tBcc : ARM::t2Bcc; | |||
807 | ||||
808 | MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); | |||
809 | MIB.add(MI->getOperand(1)); // branch target | |||
810 | MIB.addImm(ARMCC::EQ); // condition code | |||
811 | MIB.addReg(ARM::CPSR); | |||
812 | MI->eraseFromParent(); | |||
813 | } | |||
814 | ||||
815 | bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { | |||
816 | LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to sub: " << *MI; } } while (false); | |||
817 | MachineBasicBlock *MBB = MI->getParent(); | |||
818 | SmallPtrSet<MachineInstr*, 1> Ignore; | |||
819 | for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) { | |||
820 | if (I->getOpcode() == ARM::t2LoopEnd) { | |||
821 | Ignore.insert(&*I); | |||
822 | break; | |||
823 | } | |||
824 | } | |||
825 | ||||
826 | // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. | |||
827 | bool SetFlags = RDA->isSafeToDefRegAt(MI, ARM::CPSR, Ignore); | |||
828 | ||||
829 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), | |||
830 | TII->get(ARM::t2SUBri)); | |||
831 | MIB.addDef(ARM::LR); | |||
832 | MIB.add(MI->getOperand(1)); | |||
833 | MIB.add(MI->getOperand(2)); | |||
834 | MIB.addImm(ARMCC::AL); | |||
835 | MIB.addReg(0); | |||
836 | ||||
837 | if (SetFlags) { | |||
838 | MIB.addReg(ARM::CPSR); | |||
839 | MIB->getOperand(5).setIsDef(true); | |||
840 | } else | |||
841 | MIB.addReg(0); | |||
842 | ||||
843 | MI->eraseFromParent(); | |||
844 | return SetFlags; | |||
845 | } | |||
846 | ||||
847 | // Generate a subs, or sub and cmp, and a branch instead of an LE. | |||
848 | void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { | |||
849 | LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI; } } while (false); | |||
850 | ||||
851 | MachineBasicBlock *MBB = MI->getParent(); | |||
852 | // Create cmp | |||
853 | if (!SkipCmp) { | |||
854 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), | |||
855 | TII->get(ARM::t2CMPri)); | |||
856 | MIB.addReg(ARM::LR); | |||
857 | MIB.addImm(0); | |||
858 | MIB.addImm(ARMCC::AL); | |||
859 | MIB.addReg(ARM::NoRegister); | |||
860 | } | |||
861 | ||||
862 | MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); | |||
863 | unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? | |||
864 | ARM::tBcc : ARM::t2Bcc; | |||
865 | ||||
866 | // Create bne | |||
867 | MachineInstrBuilder MIB = | |||
868 | BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); | |||
869 | MIB.add(MI->getOperand(1)); // branch target | |||
870 | MIB.addImm(ARMCC::NE); // condition code | |||
871 | MIB.addReg(ARM::CPSR); | |||
872 | MI->eraseFromParent(); | |||
873 | } | |||
874 | ||||
875 | // Perform dead code elimation on the loop iteration count setup expression. | |||
876 | // If we are tail-predicating, the number of elements to be processed is the | |||
877 | // operand of the VCTP instruction in the vector body, see getCount(), which is | |||
878 | // register $r3 in this example: | |||
879 | // | |||
880 | // $lr = big-itercount-expression | |||
881 | // .. | |||
882 | // t2DoLoopStart renamable $lr | |||
883 | // vector.body: | |||
884 | // .. | |||
885 | // $vpr = MVE_VCTP32 renamable $r3 | |||
886 | // renamable $lr = t2LoopDec killed renamable $lr, 1 | |||
887 | // t2LoopEnd renamable $lr, %vector.body | |||
888 | // tB %end | |||
889 | // | |||
890 | // What we would like achieve here is to replace the do-loop start pseudo | |||
891 | // instruction t2DoLoopStart with: | |||
892 | // | |||
893 | // $lr = MVE_DLSTP_32 killed renamable $r3 | |||
894 | // | |||
895 | // Thus, $r3 which defines the number of elements, is written to $lr, | |||
896 | // and then we want to delete the whole chain that used to define $lr, | |||
897 | // see the comment below how this chain could look like. | |||
898 | // | |||
899 | void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) { | |||
900 | if (!LoLoop.IsTailPredicationLegal()) | |||
901 | return; | |||
902 | ||||
903 | LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n" ; } } while (false); | |||
904 | ||||
905 | MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 0); | |||
906 | if (!Def) { | |||
907 | LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Couldn't find iteration count.\n" ; } } while (false); | |||
908 | return; | |||
909 | } | |||
910 | ||||
911 | // Collect and remove the users of iteration count. | |||
912 | SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec, | |||
913 | LoLoop.End, LoLoop.InsertPt }; | |||
914 | SmallPtrSet<MachineInstr*, 2> Remove; | |||
915 | if (RDA->isSafeToRemove(Def, Remove, Killed)) | |||
916 | LoLoop.ToRemove.insert(Remove.begin(), Remove.end()); | |||
917 | else { | |||
918 | LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n" ; } } while (false); | |||
919 | return; | |||
920 | } | |||
921 | ||||
922 | // Collect the dead code and the MBBs in which they reside. | |||
923 | RDA->collectKilledOperands(Def, Killed); | |||
924 | SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks; | |||
925 | for (auto *MI : Killed) | |||
926 | BasicBlocks.insert(MI->getParent()); | |||
927 | ||||
928 | // Collect IT blocks in all affected basic blocks. | |||
929 | std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks; | |||
930 | for (auto *MBB : BasicBlocks) { | |||
931 | for (auto &MI : *MBB) { | |||
932 | if (MI.getOpcode() != ARM::t2IT) | |||
933 | continue; | |||
934 | RDA->getReachingLocalUses(&MI, ARM::ITSTATE, ITBlocks[&MI]); | |||
935 | } | |||
936 | } | |||
937 | ||||
938 | // If we're removing all of the instructions within an IT block, then | |||
939 | // also remove the IT instruction. | |||
940 | SmallPtrSet<MachineInstr*, 2> ModifiedITs; | |||
941 | for (auto *MI : Killed) { | |||
942 | if (MachineOperand *MO = MI->findRegisterUseOperand(ARM::ITSTATE)) { | |||
943 | MachineInstr *IT = RDA->getMIOperand(MI, *MO); | |||
944 | auto &CurrentBlock = ITBlocks[IT]; | |||
945 | CurrentBlock.erase(MI); | |||
946 | if (CurrentBlock.empty()) | |||
947 | ModifiedITs.erase(IT); | |||
948 | else | |||
949 | ModifiedITs.insert(IT); | |||
950 | } | |||
951 | } | |||
952 | ||||
953 | // Delete the killed instructions only if we don't have any IT blocks that | |||
954 | // need to be modified because we need to fixup the mask. | |||
955 | // TODO: Handle cases where IT blocks are modified. | |||
956 | if (ModifiedITs.empty()) { | |||
957 | LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n" ; for (auto *MI : Killed) dbgs() << " - " << *MI; } } while (false) | |||
958 | for (auto *MI : Killed)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n" ; for (auto *MI : Killed) dbgs() << " - " << *MI; } } while (false) | |||
959 | dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n" ; for (auto *MI : Killed) dbgs() << " - " << *MI; } } while (false); | |||
960 | LoLoop.ToRemove.insert(Killed.begin(), Killed.end()); | |||
961 | } else | |||
962 | LLVM_DEBUG(dbgs() << "ARM Loops: Would need to modify IT block(s).\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Would need to modify IT block(s).\n" ; } } while (false); | |||
963 | } | |||
964 | ||||
965 | MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { | |||
966 | LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Expanding LoopStart.\n" ; } } while (false); | |||
967 | // When using tail-predication, try to delete the dead code that was used to | |||
968 | // calculate the number of loop iterations. | |||
969 | IterationCountDCE(LoLoop); | |||
970 | ||||
971 | MachineInstr *InsertPt = LoLoop.InsertPt; | |||
972 | MachineInstr *Start = LoLoop.Start; | |||
973 | MachineBasicBlock *MBB = InsertPt->getParent(); | |||
974 | bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; | |||
975 | unsigned Opc = LoLoop.getStartOpcode(); | |||
976 | MachineOperand &Count = LoLoop.getCount(); | |||
977 | ||||
978 | MachineInstrBuilder MIB = | |||
979 | BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); | |||
980 | ||||
981 | MIB.addDef(ARM::LR); | |||
982 | MIB.add(Count); | |||
983 | if (!IsDo) | |||
984 | MIB.add(Start->getOperand(1)); | |||
985 | ||||
986 | // If we're inserting at a mov lr, then remove it as it's redundant. | |||
987 | if (InsertPt != Start) | |||
988 | LoLoop.ToRemove.insert(InsertPt); | |||
989 | LoLoop.ToRemove.insert(Start); | |||
990 | LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Inserted start: " << *MIB; } } while (false); | |||
991 | return &*MIB; | |||
992 | } | |||
993 | ||||
994 | void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { | |||
995 | auto RemovePredicate = [](MachineInstr *MI) { | |||
996 | LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing predicate from: " << *MI; } } while (false); | |||
997 | if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) { | |||
998 | assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&((MI->getOperand(PIdx).getImm() == ARMVCC::Then && "Expected Then predicate!") ? static_cast<void> (0) : __assert_fail ("MI->getOperand(PIdx).getImm() == ARMVCC::Then && \"Expected Then predicate!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 999, __PRETTY_FUNCTION__)) | |||
999 | "Expected Then predicate!")((MI->getOperand(PIdx).getImm() == ARMVCC::Then && "Expected Then predicate!") ? static_cast<void> (0) : __assert_fail ("MI->getOperand(PIdx).getImm() == ARMVCC::Then && \"Expected Then predicate!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 999, __PRETTY_FUNCTION__)); | |||
1000 | MI->getOperand(PIdx).setImm(ARMVCC::None); | |||
1001 | MI->getOperand(PIdx+1).setReg(0); | |||
1002 | } else | |||
1003 | llvm_unreachable("trying to unpredicate a non-predicated instruction")::llvm::llvm_unreachable_internal("trying to unpredicate a non-predicated instruction" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp" , 1003); | |||
1004 | }; | |||
1005 | ||||
1006 | // There are a few scenarios which we have to fix up: | |||
1007 | // 1) A VPT block with is only predicated by the vctp and has no internal vpr | |||
1008 | // defs. | |||
1009 | // 2) A VPT block which is only predicated by the vctp but has an internal | |||
1010 | // vpr def. | |||
1011 | // 3) A VPT block which is predicated upon the vctp as well as another vpr | |||
1012 | // def. | |||
1013 | // 4) A VPT block which is not predicated upon a vctp, but contains it and | |||
1014 | // all instructions within the block are predicated upon in. | |||
1015 | ||||
1016 | for (auto &Block : LoLoop.getVPTBlocks()) { | |||
| ||||
1017 | SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts(); | |||
1018 | if (Block.HasNonUniformPredicate()) { | |||
1019 | PredicatedMI *Divergent = Block.getDivergent(); | |||
1020 | if (isVCTP(Divergent->MI)) { | |||
1021 | // The vctp will be removed, so the size of the vpt block needs to be | |||
1022 | // modified. | |||
1023 | uint64_t Size = getARMVPTBlockMask(Block.size() - 1); | |||
1024 | Block.getVPST()->getOperand(0).setImm(Size); | |||
1025 | LLVM_DEBUG(dbgs() << "ARM Loops: Modified VPT block mask.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Modified VPT block mask.\n" ; } } while (false); | |||
1026 | } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { | |||
1027 | // The VPT block has a non-uniform predicate but it's entry is guarded | |||
1028 | // only by a vctp, which means we: | |||
1029 | // - Need to remove the original vpst. | |||
1030 | // - Then need to unpredicate any following instructions, until | |||
1031 | // we come across the divergent vpr def. | |||
1032 | // - Insert a new vpst to predicate the instruction(s) that following | |||
1033 | // the divergent vpr def. | |||
1034 | // TODO: We could be producing more VPT blocks than necessary and could | |||
1035 | // fold the newly created one into a proceeding one. | |||
1036 | for (auto I = ++MachineBasicBlock::iterator(Block.getVPST()), | |||
1037 | E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I) | |||
1038 | RemovePredicate(&*I); | |||
1039 | ||||
1040 | unsigned Size = 0; | |||
1041 | auto E = MachineBasicBlock::reverse_iterator(Divergent->MI); | |||
1042 | auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI); | |||
1043 | MachineInstr *InsertAt = nullptr; | |||
1044 | while (I != E) { | |||
1045 | InsertAt = &*I; | |||
1046 | ++Size; | |||
1047 | ++I; | |||
1048 | } | |||
1049 | MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt, | |||
| ||||
1050 | InsertAt->getDebugLoc(), | |||
1051 | TII->get(ARM::MVE_VPST)); | |||
1052 | MIB.addImm(getARMVPTBlockMask(Size)); | |||
1053 | LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST(); } } while (false); | |||
1054 | LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Created VPST: " << *MIB; } } while (false); | |||
1055 | LoLoop.ToRemove.insert(Block.getVPST()); | |||
1056 | } | |||
1057 | } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { | |||
1058 | // A vpt block which is only predicated upon vctp and has no internal vpr | |||
1059 | // defs: | |||
1060 | // - Remove vpst. | |||
1061 | // - Unpredicate the remaining instructions. | |||
1062 | LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST(); } } while (false); | |||
1063 | LoLoop.ToRemove.insert(Block.getVPST()); | |||
1064 | for (auto &PredMI : Insts) | |||
1065 | RemovePredicate(PredMI.MI); | |||
1066 | } | |||
1067 | } | |||
1068 | LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP; } } while (false); | |||
1069 | LoLoop.ToRemove.insert(LoLoop.VCTP); | |||
1070 | } | |||
1071 | ||||
1072 | void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) { | |||
1073 | ||||
1074 | // Combine the LoopDec and LoopEnd instructions into LE(TP). | |||
1075 | auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) { | |||
1076 | MachineInstr *End = LoLoop.End; | |||
1077 | MachineBasicBlock *MBB = End->getParent(); | |||
1078 | unsigned Opc = LoLoop.IsTailPredicationLegal() ? | |||
1079 | ARM::MVE_LETP : ARM::t2LEUpdate; | |||
1080 | MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(), | |||
1081 | TII->get(Opc)); | |||
1082 | MIB.addDef(ARM::LR); | |||
1083 | MIB.add(End->getOperand(0)); | |||
1084 | MIB.add(End->getOperand(1)); | |||
1085 | LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Inserted LE: " << *MIB; } } while (false); | |||
1086 | LoLoop.ToRemove.insert(LoLoop.Dec); | |||
1087 | LoLoop.ToRemove.insert(End); | |||
1088 | return &*MIB; | |||
1089 | }; | |||
1090 | ||||
1091 | // TODO: We should be able to automatically remove these branches before we | |||
1092 | // get here - probably by teaching analyzeBranch about the pseudo | |||
1093 | // instructions. | |||
1094 | // If there is an unconditional branch, after I, that just branches to the | |||
1095 | // next block, remove it. | |||
1096 | auto RemoveDeadBranch = [](MachineInstr *I) { | |||
1097 | MachineBasicBlock *BB = I->getParent(); | |||
1098 | MachineInstr *Terminator = &BB->instr_back(); | |||
1099 | if (Terminator->isUnconditionalBranch() && I != Terminator) { | |||
1100 | MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); | |||
1101 | if (BB->isLayoutSuccessor(Succ)) { | |||
1102 | LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing branch: " << *Terminator; } } while (false); | |||
1103 | Terminator->eraseFromParent(); | |||
1104 | } | |||
1105 | } | |||
1106 | }; | |||
1107 | ||||
1108 | if (LoLoop.Revert) { | |||
1109 | if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart) | |||
1110 | RevertWhile(LoLoop.Start); | |||
1111 | else | |||
1112 | LoLoop.Start->eraseFromParent(); | |||
1113 | bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec); | |||
1114 | RevertLoopEnd(LoLoop.End, FlagsAlreadySet); | |||
1115 | } else { | |||
1116 | LoLoop.Start = ExpandLoopStart(LoLoop); | |||
1117 | RemoveDeadBranch(LoLoop.Start); | |||
1118 | LoLoop.End = ExpandLoopEnd(LoLoop); | |||
1119 | RemoveDeadBranch(LoLoop.End); | |||
1120 | if (LoLoop.IsTailPredicationLegal()) | |||
1121 | ConvertVPTBlocks(LoLoop); | |||
1122 | for (auto *I : LoLoop.ToRemove) { | |||
1123 | LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Erasing " << *I; } } while (false); | |||
1124 | I->eraseFromParent(); | |||
1125 | } | |||
1126 | } | |||
1127 | ||||
1128 | PostOrderLoopTraversal DFS(LoLoop.ML, *MLI); | |||
1129 | DFS.ProcessLoop(); | |||
1130 | const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder(); | |||
1131 | for (auto *MBB : PostOrder) { | |||
1132 | recomputeLiveIns(*MBB); | |||
1133 | // FIXME: For some reason, the live-in print order is non-deterministic for | |||
1134 | // our tests and I can't out why... So just sort them. | |||
1135 | MBB->sortUniqueLiveIns(); | |||
1136 | } | |||
1137 | ||||
1138 | for (auto *MBB : reverse(PostOrder)) | |||
1139 | recomputeLivenessFlags(*MBB); | |||
1140 | ||||
1141 | // We've moved, removed and inserted new instructions, so update RDA. | |||
1142 | RDA->reset(); | |||
1143 | } | |||
1144 | ||||
1145 | bool ARMLowOverheadLoops::RevertNonLoops() { | |||
1146 | LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting any remaining pseudos...\n" ; } } while (false); | |||
1147 | bool Changed = false; | |||
1148 | ||||
1149 | for (auto &MBB : *MF) { | |||
1150 | SmallVector<MachineInstr*, 4> Starts; | |||
1151 | SmallVector<MachineInstr*, 4> Decs; | |||
1152 | SmallVector<MachineInstr*, 4> Ends; | |||
1153 | ||||
1154 | for (auto &I : MBB) { | |||
1155 | if (isLoopStart(I)) | |||
1156 | Starts.push_back(&I); | |||
1157 | else if (I.getOpcode() == ARM::t2LoopDec) | |||
1158 | Decs.push_back(&I); | |||
1159 | else if (I.getOpcode() == ARM::t2LoopEnd) | |||
1160 | Ends.push_back(&I); | |||
1161 | } | |||
1162 | ||||
1163 | if (Starts.empty() && Decs.empty() && Ends.empty()) | |||
1164 | continue; | |||
1165 | ||||
1166 | Changed = true; | |||
1167 | ||||
1168 | for (auto *Start : Starts) { | |||
1169 | if (Start->getOpcode() == ARM::t2WhileLoopStart) | |||
1170 | RevertWhile(Start); | |||
1171 | else | |||
1172 | Start->eraseFromParent(); | |||
1173 | } | |||
1174 | for (auto *Dec : Decs) | |||
1175 | RevertLoopDec(Dec); | |||
1176 | ||||
1177 | for (auto *End : Ends) | |||
1178 | RevertLoopEnd(End); | |||
1179 | } | |||
1180 | return Changed; | |||
1181 | } | |||
1182 | ||||
1183 | FunctionPass *llvm::createARMLowOverheadLoopsPass() { | |||
1184 | return new ARMLowOverheadLoops(); | |||
1185 | } |
1 | //===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H |
14 | #define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H |
15 | |
16 | #include "MCTargetDesc/ARMBaseInfo.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/SmallSet.h" |
19 | #include "llvm/CodeGen/MachineBasicBlock.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineOperand.h" |
23 | #include "llvm/CodeGen/TargetInstrInfo.h" |
24 | #include <array> |
25 | #include <cstdint> |
26 | |
27 | #define GET_INSTRINFO_HEADER |
28 | #include "ARMGenInstrInfo.inc" |
29 | |
30 | namespace llvm { |
31 | |
32 | class ARMBaseRegisterInfo; |
33 | class ARMSubtarget; |
34 | |
35 | class ARMBaseInstrInfo : public ARMGenInstrInfo { |
36 | const ARMSubtarget &Subtarget; |
37 | |
38 | protected: |
39 | // Can be only subclassed. |
40 | explicit ARMBaseInstrInfo(const ARMSubtarget &STI); |
41 | |
42 | void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
43 | unsigned LoadImmOpc, unsigned LoadOpc) const; |
44 | |
45 | /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI |
46 | /// and \p DefIdx. |
47 | /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of |
48 | /// the list is modeled as <Reg:SubReg, SubIdx>. |
49 | /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce |
50 | /// two elements: |
51 | /// - %1:sub1, sub0 |
52 | /// - %2<:0>, sub1 |
53 | /// |
54 | /// \returns true if it is possible to build such an input sequence |
55 | /// with the pair \p MI, \p DefIdx. False otherwise. |
56 | /// |
57 | /// \pre MI.isRegSequenceLike(). |
58 | bool getRegSequenceLikeInputs( |
59 | const MachineInstr &MI, unsigned DefIdx, |
60 | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override; |
61 | |
62 | /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI |
63 | /// and \p DefIdx. |
64 | /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. |
65 | /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce: |
66 | /// - %1:sub1, sub0 |
67 | /// |
68 | /// \returns true if it is possible to build such an input sequence |
69 | /// with the pair \p MI, \p DefIdx. False otherwise. |
70 | /// |
71 | /// \pre MI.isExtractSubregLike(). |
72 | bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, |
73 | RegSubRegPairAndIdx &InputReg) const override; |
74 | |
75 | /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI |
76 | /// and \p DefIdx. |
77 | /// \p [out] BaseReg and \p [out] InsertedReg contain |
78 | /// the equivalent inputs of INSERT_SUBREG. |
79 | /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce: |
80 | /// - BaseReg: %0:sub0 |
81 | /// - InsertedReg: %1:sub1, sub3 |
82 | /// |
83 | /// \returns true if it is possible to build such an input sequence |
84 | /// with the pair \p MI, \p DefIdx. False otherwise. |
85 | /// |
86 | /// \pre MI.isInsertSubregLike(). |
87 | bool |
88 | getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, |
89 | RegSubRegPair &BaseReg, |
90 | RegSubRegPairAndIdx &InsertedReg) const override; |
91 | |
92 | /// Commutes the operands in the given instruction. |
93 | /// The commutable operands are specified by their indices OpIdx1 and OpIdx2. |
94 | /// |
95 | /// Do not call this method for a non-commutable instruction or for |
96 | /// non-commutable pair of operand indices OpIdx1 and OpIdx2. |
97 | /// Even though the instruction is commutable, the method may still |
98 | /// fail to commute the operands, null pointer is returned in such cases. |
99 | MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
100 | unsigned OpIdx1, |
101 | unsigned OpIdx2) const override; |
102 | /// If the specific machine instruction is an instruction that moves/copies |
103 | /// value from one register to another register return destination and source |
104 | /// registers as machine operands. |
105 | Optional<DestSourcePair> |
106 | isCopyInstrImpl(const MachineInstr &MI) const override; |
107 | |
108 | /// Specialization of \ref TargetInstrInfo::describeLoadedValue, used to |
109 | /// enhance debug entry value descriptions for ARM targets. |
110 | Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI, |
111 | Register Reg) const override; |
112 | |
113 | public: |
114 | // Return whether the target has an explicit NOP encoding. |
115 | bool hasNOP() const; |
116 | |
117 | // Return the non-pre/post incrementing version of 'Opc'. Return 0 |
118 | // if there is not such an opcode. |
119 | virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; |
120 | |
121 | MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, |
122 | MachineInstr &MI, |
123 | LiveVariables *LV) const override; |
124 | |
125 | virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; |
126 | const ARMSubtarget &getSubtarget() const { return Subtarget; } |
127 | |
128 | ScheduleHazardRecognizer * |
129 | CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
130 | const ScheduleDAG *DAG) const override; |
131 | |
132 | ScheduleHazardRecognizer * |
133 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
134 | const ScheduleDAG *DAG) const override; |
135 | |
136 | // Branch analysis. |
137 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
138 | MachineBasicBlock *&FBB, |
139 | SmallVectorImpl<MachineOperand> &Cond, |
140 | bool AllowModify = false) const override; |
141 | unsigned removeBranch(MachineBasicBlock &MBB, |
142 | int *BytesRemoved = nullptr) const override; |
143 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
144 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
145 | const DebugLoc &DL, |
146 | int *BytesAdded = nullptr) const override; |
147 | |
148 | bool |
149 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; |
150 | |
151 | // Predication support. |
152 | bool isPredicated(const MachineInstr &MI) const override; |
153 | |
154 | // MIR printer helper function to annotate Operands with a comment. |
155 | std::string createMIROperandComment(const MachineInstr &MI, |
156 | const MachineOperand &Op, |
157 | unsigned OpIdx) const override; |
158 | |
159 | ARMCC::CondCodes getPredicate(const MachineInstr &MI) const { |
160 | int PIdx = MI.findFirstPredOperandIdx(); |
161 | return PIdx != -1 ? (ARMCC::CondCodes)MI.getOperand(PIdx).getImm() |
162 | : ARMCC::AL; |
163 | } |
164 | |
165 | bool PredicateInstruction(MachineInstr &MI, |
166 | ArrayRef<MachineOperand> Pred) const override; |
167 | |
168 | bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
169 | ArrayRef<MachineOperand> Pred2) const override; |
170 | |
171 | bool DefinesPredicate(MachineInstr &MI, |
172 | std::vector<MachineOperand> &Pred) const override; |
173 | |
174 | bool isPredicable(const MachineInstr &MI) const override; |
175 | |
176 | // CPSR defined in instruction |
177 | static bool isCPSRDefined(const MachineInstr &MI); |
178 | bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; |
179 | bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; |
180 | |
181 | // Load, scaled register offset |
182 | bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const; |
183 | // Load, scaled register offset, not plus LSL2 |
184 | bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const; |
185 | // Minus reg for ldstso addr mode |
186 | bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const; |
187 | // Scaled register offset in address mode 2 |
188 | bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const; |
189 | // Load multiple, base reg in list |
190 | bool isLDMBaseRegInList(const MachineInstr &MI) const; |
191 | // get LDM variable defs size |
192 | unsigned getLDMVariableDefsSize(const MachineInstr &MI) const; |
193 | |
194 | /// GetInstSize - Returns the size of the specified MachineInstr. |
195 | /// |
196 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
197 | |
198 | unsigned isLoadFromStackSlot(const MachineInstr &MI, |
199 | int &FrameIndex) const override; |
200 | unsigned isStoreToStackSlot(const MachineInstr &MI, |
201 | int &FrameIndex) const override; |
202 | unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, |
203 | int &FrameIndex) const override; |
204 | unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, |
205 | int &FrameIndex) const override; |
206 | |
207 | void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
208 | unsigned SrcReg, bool KillSrc, |
209 | const ARMSubtarget &Subtarget) const; |
210 | void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
211 | unsigned DestReg, bool KillSrc, |
212 | const ARMSubtarget &Subtarget) const; |
213 | |
214 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
215 | const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, |
216 | bool KillSrc) const override; |
217 | |
218 | void storeRegToStackSlot(MachineBasicBlock &MBB, |
219 | MachineBasicBlock::iterator MBBI, |
220 | Register SrcReg, bool isKill, int FrameIndex, |
221 | const TargetRegisterClass *RC, |
222 | const TargetRegisterInfo *TRI) const override; |
223 | |
224 | void loadRegFromStackSlot(MachineBasicBlock &MBB, |
225 | MachineBasicBlock::iterator MBBI, |
226 | Register DestReg, int FrameIndex, |
227 | const TargetRegisterClass *RC, |
228 | const TargetRegisterInfo *TRI) const override; |
229 | |
230 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
231 | |
232 | bool shouldSink(const MachineInstr &MI) const override; |
233 | |
234 | void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
235 | unsigned DestReg, unsigned SubIdx, |
236 | const MachineInstr &Orig, |
237 | const TargetRegisterInfo &TRI) const override; |
238 | |
239 | MachineInstr & |
240 | duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, |
241 | const MachineInstr &Orig) const override; |
242 | |
243 | const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
244 | unsigned SubIdx, unsigned State, |
245 | const TargetRegisterInfo *TRI) const; |
246 | |
247 | bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, |
248 | const MachineRegisterInfo *MRI) const override; |
249 | |
250 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
251 | /// determine if two loads are loading from the same base address. It should |
252 | /// only return true if the base pointers are the same and the only |
253 | /// differences between the two addresses is the offset. It also returns the |
254 | /// offsets by reference. |
255 | bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, |
256 | int64_t &Offset2) const override; |
257 | |
258 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
259 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads |
260 | /// should be scheduled togther. On some targets if two loads are loading from |
261 | /// addresses in the same cache line, it's better if they are scheduled |
262 | /// together. This function takes two integers that represent the load offsets |
263 | /// from the common base address. It returns true if it decides it's desirable |
264 | /// to schedule the two loads together. "NumLoads" is the number of loads that |
265 | /// have already been scheduled after Load1. |
266 | bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
267 | int64_t Offset1, int64_t Offset2, |
268 | unsigned NumLoads) const override; |
269 | |
270 | bool isSchedulingBoundary(const MachineInstr &MI, |
271 | const MachineBasicBlock *MBB, |
272 | const MachineFunction &MF) const override; |
273 | |
274 | bool isProfitableToIfCvt(MachineBasicBlock &MBB, |
275 | unsigned NumCycles, unsigned ExtraPredCycles, |
276 | BranchProbability Probability) const override; |
277 | |
278 | bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, |
279 | unsigned ExtraT, MachineBasicBlock &FMBB, |
280 | unsigned NumF, unsigned ExtraF, |
281 | BranchProbability Probability) const override; |
282 | |
283 | bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, |
284 | BranchProbability Probability) const override { |
285 | return NumCycles == 1; |
286 | } |
287 | |
288 | unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, |
289 | unsigned NumInsts) const override; |
290 | unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override; |
291 | |
292 | bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
293 | MachineBasicBlock &FMBB) const override; |
294 | |
295 | /// analyzeCompare - For a comparison instruction, return the source registers |
296 | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
297 | /// compares against in CmpValue. Return true if the comparison instruction |
298 | /// can be analyzed. |
299 | bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, |
300 | unsigned &SrcReg2, int &CmpMask, |
301 | int &CmpValue) const override; |
302 | |
303 | /// optimizeCompareInstr - Convert the instruction to set the zero flag so |
304 | /// that we can remove a "comparison with zero"; Remove a redundant CMP |
305 | /// instruction if the flags can be updated in the same way by an earlier |
306 | /// instruction such as SUB. |
307 | bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, |
308 | unsigned SrcReg2, int CmpMask, int CmpValue, |
309 | const MachineRegisterInfo *MRI) const override; |
310 | |
311 | bool analyzeSelect(const MachineInstr &MI, |
312 | SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp, |
313 | unsigned &FalseOp, bool &Optimizable) const override; |
314 | |
315 | MachineInstr *optimizeSelect(MachineInstr &MI, |
316 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
317 | bool) const override; |
318 | |
319 | /// FoldImmediate - 'Reg' is known to be defined by a move immediate |
320 | /// instruction, try to fold the immediate into the use instruction. |
321 | bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, |
322 | MachineRegisterInfo *MRI) const override; |
323 | |
324 | unsigned getNumMicroOps(const InstrItineraryData *ItinData, |
325 | const MachineInstr &MI) const override; |
326 | |
327 | int getOperandLatency(const InstrItineraryData *ItinData, |
328 | const MachineInstr &DefMI, unsigned DefIdx, |
329 | const MachineInstr &UseMI, |
330 | unsigned UseIdx) const override; |
331 | int getOperandLatency(const InstrItineraryData *ItinData, |
332 | SDNode *DefNode, unsigned DefIdx, |
333 | SDNode *UseNode, unsigned UseIdx) const override; |
334 | |
335 | /// VFP/NEON execution domains. |
336 | std::pair<uint16_t, uint16_t> |
337 | getExecutionDomain(const MachineInstr &MI) const override; |
338 | void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; |
339 | |
340 | unsigned |
341 | getPartialRegUpdateClearance(const MachineInstr &, unsigned, |
342 | const TargetRegisterInfo *) const override; |
343 | void breakPartialRegDependency(MachineInstr &, unsigned, |
344 | const TargetRegisterInfo *TRI) const override; |
345 | |
346 | /// Get the number of addresses by LDM or VLDM or zero for unknown. |
347 | unsigned getNumLDMAddresses(const MachineInstr &MI) const; |
348 | |
349 | std::pair<unsigned, unsigned> |
350 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
351 | ArrayRef<std::pair<unsigned, const char *>> |
352 | getSerializableDirectMachineOperandTargetFlags() const override; |
353 | ArrayRef<std::pair<unsigned, const char *>> |
354 | getSerializableBitmaskMachineOperandTargetFlags() const override; |
355 | |
356 | private: |
357 | unsigned getInstBundleLength(const MachineInstr &MI) const; |
358 | |
359 | int getVLDMDefCycle(const InstrItineraryData *ItinData, |
360 | const MCInstrDesc &DefMCID, |
361 | unsigned DefClass, |
362 | unsigned DefIdx, unsigned DefAlign) const; |
363 | int getLDMDefCycle(const InstrItineraryData *ItinData, |
364 | const MCInstrDesc &DefMCID, |
365 | unsigned DefClass, |
366 | unsigned DefIdx, unsigned DefAlign) const; |
367 | int getVSTMUseCycle(const InstrItineraryData *ItinData, |
368 | const MCInstrDesc &UseMCID, |
369 | unsigned UseClass, |
370 | unsigned UseIdx, unsigned UseAlign) const; |
371 | int getSTMUseCycle(const InstrItineraryData *ItinData, |
372 | const MCInstrDesc &UseMCID, |
373 | unsigned UseClass, |
374 | unsigned UseIdx, unsigned UseAlign) const; |
375 | int getOperandLatency(const InstrItineraryData *ItinData, |
376 | const MCInstrDesc &DefMCID, |
377 | unsigned DefIdx, unsigned DefAlign, |
378 | const MCInstrDesc &UseMCID, |
379 | unsigned UseIdx, unsigned UseAlign) const; |
380 | |
381 | int getOperandLatencyImpl(const InstrItineraryData *ItinData, |
382 | const MachineInstr &DefMI, unsigned DefIdx, |
383 | const MCInstrDesc &DefMCID, unsigned DefAdj, |
384 | const MachineOperand &DefMO, unsigned Reg, |
385 | const MachineInstr &UseMI, unsigned UseIdx, |
386 | const MCInstrDesc &UseMCID, unsigned UseAdj) const; |
387 | |
388 | unsigned getPredicationCost(const MachineInstr &MI) const override; |
389 | |
390 | unsigned getInstrLatency(const InstrItineraryData *ItinData, |
391 | const MachineInstr &MI, |
392 | unsigned *PredCost = nullptr) const override; |
393 | |
394 | int getInstrLatency(const InstrItineraryData *ItinData, |
395 | SDNode *Node) const override; |
396 | |
397 | bool hasHighOperandLatency(const TargetSchedModel &SchedModel, |
398 | const MachineRegisterInfo *MRI, |
399 | const MachineInstr &DefMI, unsigned DefIdx, |
400 | const MachineInstr &UseMI, |
401 | unsigned UseIdx) const override; |
402 | bool hasLowDefLatency(const TargetSchedModel &SchedModel, |
403 | const MachineInstr &DefMI, |
404 | unsigned DefIdx) const override; |
405 | |
406 | /// verifyInstruction - Perform target specific instruction verification. |
407 | bool verifyInstruction(const MachineInstr &MI, |
408 | StringRef &ErrInfo) const override; |
409 | |
410 | virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI) const = 0; |
411 | |
412 | void expandMEMCPY(MachineBasicBlock::iterator) const; |
413 | |
414 | /// Identify instructions that can be folded into a MOVCC instruction, and |
415 | /// return the defining instruction. |
416 | MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, |
417 | const TargetInstrInfo *TII) const; |
418 | |
419 | private: |
420 | /// Modeling special VFP / NEON fp MLA / MLS hazards. |
421 | |
422 | /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal |
423 | /// MLx table. |
424 | DenseMap<unsigned, unsigned> MLxEntryMap; |
425 | |
426 | /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause |
427 | /// stalls when scheduled together with fp MLA / MLS opcodes. |
428 | SmallSet<unsigned, 16> MLxHazardOpcodes; |
429 | |
430 | public: |
431 | /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS |
432 | /// instruction. |
433 | bool isFpMLxInstruction(unsigned Opcode) const { |
434 | return MLxEntryMap.count(Opcode); |
435 | } |
436 | |
437 | /// isFpMLxInstruction - This version also returns the multiply opcode and the |
438 | /// addition / subtraction opcode to expand to. Return true for 'HasLane' for |
439 | /// the MLX instructions with an extra lane operand. |
440 | bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
441 | unsigned &AddSubOpc, bool &NegAcc, |
442 | bool &HasLane) const; |
443 | |
444 | /// canCauseFpMLxStall - Return true if an instruction of the specified opcode |
445 | /// will cause stalls when scheduled after (within 4-cycle window) a fp |
446 | /// MLA / MLS instruction. |
447 | bool canCauseFpMLxStall(unsigned Opcode) const { |
448 | return MLxHazardOpcodes.count(Opcode); |
449 | } |
450 | |
451 | /// Returns true if the instruction has a shift by immediate that can be |
452 | /// executed in one cycle less. |
453 | bool isSwiftFastImmShift(const MachineInstr *MI) const; |
454 | |
455 | /// Returns predicate register associated with the given frame instruction. |
456 | unsigned getFramePred(const MachineInstr &MI) const { |
457 | assert(isFrameInstr(MI))((isFrameInstr(MI)) ? static_cast<void> (0) : __assert_fail ("isFrameInstr(MI)", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h" , 457, __PRETTY_FUNCTION__)); |
458 | // Operands of ADJCALLSTACKDOWN/ADJCALLSTACKUP: |
459 | // - argument declared in the pattern: |
460 | // 0 - frame size |
461 | // 1 - arg of CALLSEQ_START/CALLSEQ_END |
462 | // 2 - predicate code (like ARMCC::AL) |
463 | // - added by predOps: |
464 | // 3 - predicate reg |
465 | return MI.getOperand(3).getReg(); |
466 | } |
467 | |
468 | Optional<RegImmPair> isAddImmediate(const MachineInstr &MI, |
469 | Register Reg) const override; |
470 | }; |
471 | |
472 | /// Get the operands corresponding to the given \p Pred value. By default, the |
473 | /// predicate register is assumed to be 0 (no register), but you can pass in a |
474 | /// \p PredReg if that is not the case. |
475 | static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred, |
476 | unsigned PredReg = 0) { |
477 | return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)), |
478 | MachineOperand::CreateReg(PredReg, false)}}; |
479 | } |
480 | |
481 | /// Get the operand corresponding to the conditional code result. By default, |
482 | /// this is 0 (no register). |
483 | static inline MachineOperand condCodeOp(unsigned CCReg = 0) { |
484 | return MachineOperand::CreateReg(CCReg, false); |
485 | } |
486 | |
487 | /// Get the operand corresponding to the conditional code result for Thumb1. |
488 | /// This operand will always refer to CPSR and it will have the Define flag set. |
489 | /// You can optionally set the Dead flag by means of \p isDead. |
490 | static inline MachineOperand t1CondCodeOp(bool isDead = false) { |
491 | return MachineOperand::CreateReg(ARM::CPSR, |
492 | /*Define*/ true, /*Implicit*/ false, |
493 | /*Kill*/ false, isDead); |
494 | } |
495 | |
496 | static inline |
497 | bool isUncondBranchOpcode(int Opc) { |
498 | return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B; |
499 | } |
500 | |
501 | // This table shows the VPT instruction variants, i.e. the different |
502 | // mask field encodings, see also B5.6. Predication/conditional execution in |
503 | // the ArmARM. |
504 | |
505 | |
506 | inline static unsigned getARMVPTBlockMask(unsigned NumInsts) { |
507 | switch (NumInsts) { |
508 | case 1: |
509 | return ARMVCC::T; |
510 | case 2: |
511 | return ARMVCC::TT; |
512 | case 3: |
513 | return ARMVCC::TTT; |
514 | case 4: |
515 | return ARMVCC::TTTT; |
516 | default: |
517 | break; |
518 | }; |
519 | llvm_unreachable("Unexpected number of instruction in a VPT block")::llvm::llvm_unreachable_internal("Unexpected number of instruction in a VPT block" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h" , 519); |
520 | } |
521 | |
522 | |
523 | static inline bool isVPTOpcode(int Opc) { |
524 | return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 || |
525 | Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 || |
526 | Opc == ARM::MVE_VPTv8u16 || Opc == ARM::MVE_VPTv8s16 || |
527 | Opc == ARM::MVE_VPTv4i32 || Opc == ARM::MVE_VPTv4u32 || |
528 | Opc == ARM::MVE_VPTv4s32 || Opc == ARM::MVE_VPTv4f32 || |
529 | Opc == ARM::MVE_VPTv8f16 || Opc == ARM::MVE_VPTv16i8r || |
530 | Opc == ARM::MVE_VPTv16u8r || Opc == ARM::MVE_VPTv16s8r || |
531 | Opc == ARM::MVE_VPTv8i16r || Opc == ARM::MVE_VPTv8u16r || |
532 | Opc == ARM::MVE_VPTv8s16r || Opc == ARM::MVE_VPTv4i32r || |
533 | Opc == ARM::MVE_VPTv4u32r || Opc == ARM::MVE_VPTv4s32r || |
534 | Opc == ARM::MVE_VPTv4f32r || Opc == ARM::MVE_VPTv8f16r || |
535 | Opc == ARM::MVE_VPST; |
536 | } |
537 | |
538 | static inline |
539 | unsigned VCMPOpcodeToVPT(unsigned Opcode) { |
540 | switch (Opcode) { |
541 | default: |
542 | return 0; |
543 | case ARM::MVE_VCMPf32: |
544 | return ARM::MVE_VPTv4f32; |
545 | case ARM::MVE_VCMPf16: |
546 | return ARM::MVE_VPTv8f16; |
547 | case ARM::MVE_VCMPi8: |
548 | return ARM::MVE_VPTv16i8; |
549 | case ARM::MVE_VCMPi16: |
550 | return ARM::MVE_VPTv8i16; |
551 | case ARM::MVE_VCMPi32: |
552 | return ARM::MVE_VPTv4i32; |
553 | case ARM::MVE_VCMPu8: |
554 | return ARM::MVE_VPTv16u8; |
555 | case ARM::MVE_VCMPu16: |
556 | return ARM::MVE_VPTv8u16; |
557 | case ARM::MVE_VCMPu32: |
558 | return ARM::MVE_VPTv4u32; |
559 | case ARM::MVE_VCMPs8: |
560 | return ARM::MVE_VPTv16s8; |
561 | case ARM::MVE_VCMPs16: |
562 | return ARM::MVE_VPTv8s16; |
563 | case ARM::MVE_VCMPs32: |
564 | return ARM::MVE_VPTv4s32; |
565 | |
566 | case ARM::MVE_VCMPf32r: |
567 | return ARM::MVE_VPTv4f32r; |
568 | case ARM::MVE_VCMPf16r: |
569 | return ARM::MVE_VPTv8f16r; |
570 | case ARM::MVE_VCMPi8r: |
571 | return ARM::MVE_VPTv16i8r; |
572 | case ARM::MVE_VCMPi16r: |
573 | return ARM::MVE_VPTv8i16r; |
574 | case ARM::MVE_VCMPi32r: |
575 | return ARM::MVE_VPTv4i32r; |
576 | case ARM::MVE_VCMPu8r: |
577 | return ARM::MVE_VPTv16u8r; |
578 | case ARM::MVE_VCMPu16r: |
579 | return ARM::MVE_VPTv8u16r; |
580 | case ARM::MVE_VCMPu32r: |
581 | return ARM::MVE_VPTv4u32r; |
582 | case ARM::MVE_VCMPs8r: |
583 | return ARM::MVE_VPTv16s8r; |
584 | case ARM::MVE_VCMPs16r: |
585 | return ARM::MVE_VPTv8s16r; |
586 | case ARM::MVE_VCMPs32r: |
587 | return ARM::MVE_VPTv4s32r; |
588 | } |
589 | } |
590 | |
591 | static inline |
592 | unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) { |
593 | switch (Opcode) { |
594 | default: |
595 | llvm_unreachable("unhandled vctp opcode")::llvm::llvm_unreachable_internal("unhandled vctp opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h" , 595); |
596 | break; |
597 | case ARM::MVE_VCTP8: |
598 | return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; |
599 | case ARM::MVE_VCTP16: |
600 | return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; |
601 | case ARM::MVE_VCTP32: |
602 | return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; |
603 | case ARM::MVE_VCTP64: |
604 | return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; |
605 | } |
606 | return 0; |
607 | } |
608 | |
609 | static inline unsigned getTailPredVectorWidth(unsigned Opcode) { |
610 | switch (Opcode) { |
611 | default: |
612 | llvm_unreachable("unhandled vctp opcode")::llvm::llvm_unreachable_internal("unhandled vctp opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h" , 612); |
613 | case ARM::MVE_VCTP8: return 16; |
614 | case ARM::MVE_VCTP16: return 8; |
615 | case ARM::MVE_VCTP32: return 4; |
616 | case ARM::MVE_VCTP64: return 2; |
617 | } |
618 | return 0; |
619 | } |
620 | |
621 | static inline |
622 | bool isVCTP(MachineInstr *MI) { |
623 | switch (MI->getOpcode()) { |
624 | default: |
625 | break; |
626 | case ARM::MVE_VCTP8: |
627 | case ARM::MVE_VCTP16: |
628 | case ARM::MVE_VCTP32: |
629 | case ARM::MVE_VCTP64: |
630 | return true; |
631 | } |
632 | return false; |
633 | } |
634 | |
635 | static inline |
636 | bool isLoopStart(MachineInstr &MI) { |
637 | return MI.getOpcode() == ARM::t2DoLoopStart || |
638 | MI.getOpcode() == ARM::t2WhileLoopStart; |
639 | } |
640 | |
641 | static inline |
642 | bool isCondBranchOpcode(int Opc) { |
643 | return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; |
644 | } |
645 | |
646 | static inline bool isJumpTableBranchOpcode(int Opc) { |
647 | return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm_i12 || |
648 | Opc == ARM::BR_JTm_rs || Opc == ARM::BR_JTadd || Opc == ARM::tBR_JTr || |
649 | Opc == ARM::t2BR_JT; |
650 | } |
651 | |
652 | static inline |
653 | bool isIndirectBranchOpcode(int Opc) { |
654 | return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; |
655 | } |
656 | |
657 | static inline bool isPopOpcode(int Opc) { |
658 | return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET || |
659 | Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD || |
660 | Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD; |
661 | } |
662 | |
663 | static inline bool isPushOpcode(int Opc) { |
664 | return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD || |
665 | Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD; |
666 | } |
667 | |
668 | static inline bool isSubImmOpcode(int Opc) { |
669 | return Opc == ARM::SUBri || |
670 | Opc == ARM::tSUBi3 || Opc == ARM::tSUBi8 || |
671 | Opc == ARM::tSUBSi3 || Opc == ARM::tSUBSi8 || |
672 | Opc == ARM::t2SUBri || Opc == ARM::t2SUBri12 || Opc == ARM::t2SUBSri; |
673 | } |
674 | |
675 | static inline bool isMovRegOpcode(int Opc) { |
676 | return Opc == ARM::MOVr || Opc == ARM::tMOVr || Opc == ARM::t2MOVr; |
677 | } |
678 | |
679 | /// isValidCoprocessorNumber - decide whether an explicit coprocessor |
680 | /// number is legal in generic instructions like CDP. The answer can |
681 | /// vary with the subtarget. |
682 | static inline bool isValidCoprocessorNumber(unsigned Num, |
683 | const FeatureBitset& featureBits) { |
684 | // Armv8-A disallows everything *other* than 111x (CP14 and CP15). |
685 | if (featureBits[ARM::HasV8Ops] && (Num & 0xE) != 0xE) |
686 | return false; |
687 | |
688 | // Armv7 disallows 101x (CP10 and CP11), which clash with VFP/NEON. |
689 | if (featureBits[ARM::HasV7Ops] && (Num & 0xE) == 0xA) |
690 | return false; |
691 | |
692 | // Armv8.1-M also disallows 100x (CP8,CP9) and 111x (CP14,CP15) |
693 | // which clash with MVE. |
694 | if (featureBits[ARM::HasV8_1MMainlineOps] && |
695 | ((Num & 0xE) == 0x8 || (Num & 0xE) == 0xE)) |
696 | return false; |
697 | |
698 | return true; |
699 | } |
700 | |
701 | /// getInstrPredicate - If instruction is predicated, returns its predicate |
702 | /// condition, otherwise returns AL. It also returns the condition code |
703 | /// register by reference. |
704 | ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg); |
705 | |
706 | unsigned getMatchingCondBranchOpcode(unsigned Opc); |
707 | |
708 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether |
709 | /// the instruction is encoded with an 'S' bit is determined by the optional |
710 | /// CPSR def operand. |
711 | unsigned convertAddSubFlagsOpcode(unsigned OldOpc); |
712 | |
713 | /// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of |
714 | /// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 |
715 | /// code. |
716 | void emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
717 | MachineBasicBlock::iterator &MBBI, |
718 | const DebugLoc &dl, unsigned DestReg, |
719 | unsigned BaseReg, int NumBytes, |
720 | ARMCC::CondCodes Pred, unsigned PredReg, |
721 | const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); |
722 | |
723 | void emitT2RegPlusImmediate(MachineBasicBlock &MBB, |
724 | MachineBasicBlock::iterator &MBBI, |
725 | const DebugLoc &dl, unsigned DestReg, |
726 | unsigned BaseReg, int NumBytes, |
727 | ARMCC::CondCodes Pred, unsigned PredReg, |
728 | const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); |
729 | void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, |
730 | MachineBasicBlock::iterator &MBBI, |
731 | const DebugLoc &dl, unsigned DestReg, |
732 | unsigned BaseReg, int NumBytes, |
733 | const TargetInstrInfo &TII, |
734 | const ARMBaseRegisterInfo &MRI, |
735 | unsigned MIFlags = 0); |
736 | |
737 | /// Tries to add registers to the reglist of a given base-updating |
738 | /// push/pop instruction to adjust the stack by an additional |
739 | /// NumBytes. This can save a few bytes per function in code-size, but |
740 | /// obviously generates more memory traffic. As such, it only takes |
741 | /// effect in functions being optimised for size. |
742 | bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
743 | MachineFunction &MF, MachineInstr *MI, |
744 | unsigned NumBytes); |
745 | |
746 | /// rewriteARMFrameIndex / rewriteT2FrameIndex - |
747 | /// Rewrite MI to access 'Offset' bytes from the FP. Return false if the |
748 | /// offset could not be handled directly in MI, and return the left-over |
749 | /// portion by reference. |
750 | bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
751 | unsigned FrameReg, int &Offset, |
752 | const ARMBaseInstrInfo &TII); |
753 | |
754 | bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
755 | unsigned FrameReg, int &Offset, |
756 | const ARMBaseInstrInfo &TII, |
757 | const TargetRegisterInfo *TRI); |
758 | |
759 | /// Return true if Reg is defd between From and To |
760 | bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, |
761 | MachineBasicBlock::iterator To, |
762 | const TargetRegisterInfo *TRI); |
763 | |
764 | /// Search backwards from a tBcc to find a tCMPi8 against 0, meaning |
765 | /// we can convert them to a tCBZ or tCBNZ. Return nullptr if not found. |
766 | MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br, |
767 | const TargetRegisterInfo *TRI); |
768 | |
769 | void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB); |
770 | void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg); |
771 | |
772 | void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond); |
773 | void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, |
774 | unsigned Inactive); |
775 | |
776 | /// Returns the number of instructions required to materialize the given |
777 | /// constant in a register, or 3 if a literal pool load is needed. |
778 | /// If ForCodesize is specified, an approximate cost in bytes is returned. |
779 | unsigned ConstantMaterializationCost(unsigned Val, |
780 | const ARMSubtarget *Subtarget, |
781 | bool ForCodesize = false); |
782 | |
783 | /// Returns true if Val1 has a lower Constant Materialization Cost than Val2. |
784 | /// Uses the cost from ConstantMaterializationCost, first with ForCodesize as |
785 | /// specified. If the scores are equal, return the comparison for !ForCodesize. |
786 | bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, |
787 | const ARMSubtarget *Subtarget, |
788 | bool ForCodesize = false); |
789 | |
790 | } // end namespace llvm |
791 | |
792 | #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H |