Bug Summary

File:llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
Warning:line 1049, column 44
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMLowOverheadLoops.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

1//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10/// instructions into machine operations.
11/// The expectation is that the loop contains three pseudo instructions:
12/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13/// form should be in the preheader, whereas the while form should be in the
14/// preheaders only predecessor.
15/// - t2LoopDec - placed within in the loop body.
16/// - t2LoopEnd - the loop latch terminator.
17///
18/// In addition to this, we also look for the presence of the VCTP instruction,
19/// which determines whether we can generated the tail-predicated low-overhead
20/// loop form.
21///
22/// Assumptions and Dependencies:
23/// Low-overhead loops are constructed and executed using a setup instruction:
24/// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP.
25/// WLS(TP) and LE(TP) are branching instructions with a (large) limited range
26/// but fixed polarity: WLS can only branch forwards and LE can only branch
27/// backwards. These restrictions mean that this pass is dependent upon block
28/// layout and block sizes, which is why it's the last pass to run. The same is
29/// true for ConstantIslands, but this pass does not increase the size of the
30/// basic blocks, nor does it change the CFG. Instructions are mainly removed
31/// during the transform and pseudo instructions are replaced by real ones. In
32/// some cases, when we have to revert to a 'normal' loop, we have to introduce
33/// multiple instructions for a single pseudo (see RevertWhile and
34/// RevertLoopEnd). To handle this situation, t2WhileLoopStart and t2LoopEnd
35/// are defined to be as large as this maximum sequence of replacement
36/// instructions.
37///
38//===----------------------------------------------------------------------===//
39
40#include "ARM.h"
41#include "ARMBaseInstrInfo.h"
42#include "ARMBaseRegisterInfo.h"
43#include "ARMBasicBlockInfo.h"
44#include "ARMSubtarget.h"
45#include "Thumb2InstrInfo.h"
46#include "llvm/ADT/SetOperations.h"
47#include "llvm/ADT/SmallSet.h"
48#include "llvm/CodeGen/LivePhysRegs.h"
49#include "llvm/CodeGen/MachineFunctionPass.h"
50#include "llvm/CodeGen/MachineLoopInfo.h"
51#include "llvm/CodeGen/MachineLoopUtils.h"
52#include "llvm/CodeGen/MachineRegisterInfo.h"
53#include "llvm/CodeGen/Passes.h"
54#include "llvm/CodeGen/ReachingDefAnalysis.h"
55#include "llvm/MC/MCInstrDesc.h"
56
57using namespace llvm;
58
59#define DEBUG_TYPE"arm-low-overhead-loops" "arm-low-overhead-loops"
60#define ARM_LOW_OVERHEAD_LOOPS_NAME"ARM Low Overhead Loops pass" "ARM Low Overhead Loops pass"
61
62namespace {
63
64 class PostOrderLoopTraversal {
65 MachineLoop &ML;
66 MachineLoopInfo &MLI;
67 SmallPtrSet<MachineBasicBlock*, 4> Visited;
68 SmallVector<MachineBasicBlock*, 4> Order;
69
70 public:
71 PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)
72 : ML(ML), MLI(MLI) { }
73
74 const SmallVectorImpl<MachineBasicBlock*> &getOrder() const {
75 return Order;
76 }
77
78 // Visit all the blocks within the loop, as well as exit blocks and any
79 // blocks properly dominating the header.
80 void ProcessLoop() {
81 std::function<void(MachineBasicBlock*)> Search = [this, &Search]
82 (MachineBasicBlock *MBB) -> void {
83 if (Visited.count(MBB))
84 return;
85
86 Visited.insert(MBB);
87 for (auto *Succ : MBB->successors()) {
88 if (!ML.contains(Succ))
89 continue;
90 Search(Succ);
91 }
92 Order.push_back(MBB);
93 };
94
95 // Insert exit blocks.
96 SmallVector<MachineBasicBlock*, 2> ExitBlocks;
97 ML.getExitBlocks(ExitBlocks);
98 for (auto *MBB : ExitBlocks)
99 Order.push_back(MBB);
100
101 // Then add the loop body.
102 Search(ML.getHeader());
103
104 // Then try the preheader and its predecessors.
105 std::function<void(MachineBasicBlock*)> GetPredecessor =
106 [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void {
107 Order.push_back(MBB);
108 if (MBB->pred_size() == 1)
109 GetPredecessor(*MBB->pred_begin());
110 };
111
112 if (auto *Preheader = ML.getLoopPreheader())
113 GetPredecessor(Preheader);
114 else if (auto *Preheader = MLI.findLoopPreheader(&ML, true))
115 GetPredecessor(Preheader);
116 }
117 };
118
119 struct PredicatedMI {
120 MachineInstr *MI = nullptr;
121 SetVector<MachineInstr*> Predicates;
122
123 public:
124 PredicatedMI(MachineInstr *I, SetVector<MachineInstr*> &Preds) :
125 MI(I) { Predicates.insert(Preds.begin(), Preds.end()); }
126 };
127
128 // Represent a VPT block, a list of instructions that begins with a VPST and
129 // has a maximum of four proceeding instructions. All instructions within the
130 // block are predicated upon the vpr and we allow instructions to define the
131 // vpr within in the block too.
132 class VPTBlock {
133 std::unique_ptr<PredicatedMI> VPST;
134 PredicatedMI *Divergent = nullptr;
135 SmallVector<PredicatedMI, 4> Insts;
136
137 public:
138 VPTBlock(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
139 VPST = std::make_unique<PredicatedMI>(MI, Preds);
140 }
141
142 void addInst(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
143 LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Adding predicated MI: "
<< *MI; } } while (false)
;
144 if (!Divergent && !set_difference(Preds, VPST->Predicates).empty()) {
145 Divergent = &Insts.back();
146 LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << " - has divergent predicate: "
<< *Divergent->MI; } } while (false)
;
147 }
148 Insts.emplace_back(MI, Preds);
149 assert(Insts.size() <= 4 && "Too many instructions in VPT block!")((Insts.size() <= 4 && "Too many instructions in VPT block!"
) ? static_cast<void> (0) : __assert_fail ("Insts.size() <= 4 && \"Too many instructions in VPT block!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 149, __PRETTY_FUNCTION__))
;
150 }
151
152 // Have we found an instruction within the block which defines the vpr? If
153 // so, not all the instructions in the block will have the same predicate.
154 bool HasNonUniformPredicate() const {
155 return Divergent != nullptr;
3
Assuming the condition is true
4
Returning the value 1, which participates in a condition later
156 }
157
158 // Is the given instruction part of the predicate set controlling the entry
159 // to the block.
160 bool IsPredicatedOn(MachineInstr *MI) const {
161 return VPST->Predicates.count(MI);
162 }
163
164 // Is the given instruction the only predicate which controls the entry to
165 // the block.
166 bool IsOnlyPredicatedOn(MachineInstr *MI) const {
167 return IsPredicatedOn(MI) && VPST->Predicates.size() == 1;
14
Assuming the condition is true
15
Assuming the condition is true
16
Returning the value 1, which participates in a condition later
168 }
169
170 unsigned size() const { return Insts.size(); }
171 SmallVectorImpl<PredicatedMI> &getInsts() { return Insts; }
172 MachineInstr *getVPST() const { return VPST->MI; }
173 PredicatedMI *getDivergent() const { return Divergent; }
174 };
175
176 struct LowOverheadLoop {
177
178 MachineLoop &ML;
179 MachineLoopInfo &MLI;
180 ReachingDefAnalysis &RDA;
181 const TargetRegisterInfo &TRI;
182 MachineFunction *MF = nullptr;
183 MachineInstr *InsertPt = nullptr;
184 MachineInstr *Start = nullptr;
185 MachineInstr *Dec = nullptr;
186 MachineInstr *End = nullptr;
187 MachineInstr *VCTP = nullptr;
188 VPTBlock *CurrentBlock = nullptr;
189 SetVector<MachineInstr*> CurrentPredicate;
190 SmallVector<VPTBlock, 4> VPTBlocks;
191 SmallPtrSet<MachineInstr*, 4> ToRemove;
192 bool Revert = false;
193 bool CannotTailPredicate = false;
194
195 LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
196 ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI)
197 : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI) {
198 MF = ML.getHeader()->getParent();
199 }
200
201 // If this is an MVE instruction, check that we know how to use tail
202 // predication with it. Record VPT blocks and return whether the
203 // instruction is valid for tail predication.
204 bool ValidateMVEInst(MachineInstr *MI);
205
206 void AnalyseMVEInst(MachineInstr *MI) {
207 CannotTailPredicate = !ValidateMVEInst(MI);
208 }
209
210 bool IsTailPredicationLegal() const {
211 // For now, let's keep things really simple and only support a single
212 // block for tail predication.
213 return !Revert && FoundAllComponents() && VCTP &&
214 !CannotTailPredicate && ML.getNumBlocks() == 1;
215 }
216
217 // Check that the predication in the loop will be equivalent once we
218 // perform the conversion. Also ensure that we can provide the number
219 // of elements to the loop start instruction.
220 bool ValidateTailPredicate(MachineInstr *StartInsertPt);
221
222 // Check that any values available outside of the loop will be the same
223 // after tail predication conversion.
224 bool ValidateLiveOuts() const;
225
226 // Is it safe to define LR with DLS/WLS?
227 // LR can be defined if it is the operand to start, because it's the same
228 // value, or if it's going to be equivalent to the operand to Start.
229 MachineInstr *isSafeToDefineLR();
230
231 // Check the branch targets are within range and we satisfy our
232 // restrictions.
233 void CheckLegality(ARMBasicBlockUtils *BBUtils);
234
235 bool FoundAllComponents() const {
236 return Start && Dec && End;
237 }
238
239 SmallVectorImpl<VPTBlock> &getVPTBlocks() { return VPTBlocks; }
240
241 // Return the loop iteration count, or the number of elements if we're tail
242 // predicating.
243 MachineOperand &getCount() {
244 return IsTailPredicationLegal() ?
245 VCTP->getOperand(1) : Start->getOperand(0);
246 }
247
248 unsigned getStartOpcode() const {
249 bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
250 if (!IsTailPredicationLegal())
251 return IsDo ? ARM::t2DLS : ARM::t2WLS;
252
253 return VCTPOpcodeToLSTP(VCTP->getOpcode(), IsDo);
254 }
255
256 void dump() const {
257 if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
258 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
259 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
260 if (VCTP) dbgs() << "ARM Loops: Found VCTP: " << *VCTP;
261 if (!FoundAllComponents())
262 dbgs() << "ARM Loops: Not a low-overhead loop.\n";
263 else if (!(Start && Dec && End))
264 dbgs() << "ARM Loops: Failed to find all loop components.\n";
265 }
266 };
267
268 class ARMLowOverheadLoops : public MachineFunctionPass {
269 MachineFunction *MF = nullptr;
270 MachineLoopInfo *MLI = nullptr;
271 ReachingDefAnalysis *RDA = nullptr;
272 const ARMBaseInstrInfo *TII = nullptr;
273 MachineRegisterInfo *MRI = nullptr;
274 const TargetRegisterInfo *TRI = nullptr;
275 std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
276
277 public:
278 static char ID;
279
280 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
281
282 void getAnalysisUsage(AnalysisUsage &AU) const override {
283 AU.setPreservesCFG();
284 AU.addRequired<MachineLoopInfo>();
285 AU.addRequired<ReachingDefAnalysis>();
286 MachineFunctionPass::getAnalysisUsage(AU);
287 }
288
289 bool runOnMachineFunction(MachineFunction &MF) override;
290
291 MachineFunctionProperties getRequiredProperties() const override {
292 return MachineFunctionProperties().set(
293 MachineFunctionProperties::Property::NoVRegs).set(
294 MachineFunctionProperties::Property::TracksLiveness);
295 }
296
297 StringRef getPassName() const override {
298 return ARM_LOW_OVERHEAD_LOOPS_NAME"ARM Low Overhead Loops pass";
299 }
300
301 private:
302 bool ProcessLoop(MachineLoop *ML);
303
304 bool RevertNonLoops();
305
306 void RevertWhile(MachineInstr *MI) const;
307
308 bool RevertLoopDec(MachineInstr *MI) const;
309
310 void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
311
312 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
313
314 MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
315
316 void Expand(LowOverheadLoop &LoLoop);
317
318 void IterationCountDCE(LowOverheadLoop &LoLoop);
319 };
320}
321
322char ARMLowOverheadLoops::ID = 0;
323
324INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,static void *initializeARMLowOverheadLoopsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "ARM Low Overhead Loops pass"
, "arm-low-overhead-loops", &ARMLowOverheadLoops::ID, PassInfo
::NormalCtor_t(callDefaultCtor<ARMLowOverheadLoops>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeARMLowOverheadLoopsPassFlag; void llvm
::initializeARMLowOverheadLoopsPass(PassRegistry &Registry
) { llvm::call_once(InitializeARMLowOverheadLoopsPassFlag, initializeARMLowOverheadLoopsPassOnce
, std::ref(Registry)); }
325 false, false)static void *initializeARMLowOverheadLoopsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "ARM Low Overhead Loops pass"
, "arm-low-overhead-loops", &ARMLowOverheadLoops::ID, PassInfo
::NormalCtor_t(callDefaultCtor<ARMLowOverheadLoops>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeARMLowOverheadLoopsPassFlag; void llvm
::initializeARMLowOverheadLoopsPass(PassRegistry &Registry
) { llvm::call_once(InitializeARMLowOverheadLoopsPassFlag, initializeARMLowOverheadLoopsPassOnce
, std::ref(Registry)); }
326
327MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
328 // We can define LR because LR already contains the same value.
329 if (Start->getOperand(0).getReg() == ARM::LR)
330 return Start;
331
332 unsigned CountReg = Start->getOperand(0).getReg();
333 auto IsMoveLR = [&CountReg](MachineInstr *MI) {
334 return MI->getOpcode() == ARM::tMOVr &&
335 MI->getOperand(0).getReg() == ARM::LR &&
336 MI->getOperand(1).getReg() == CountReg &&
337 MI->getOperand(2).getImm() == ARMCC::AL;
338 };
339
340 MachineBasicBlock *MBB = Start->getParent();
341
342 // Find an insertion point:
343 // - Is there a (mov lr, Count) before Start? If so, and nothing else writes
344 // to Count before Start, we can insert at that mov.
345 if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR))
346 if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
347 return LRDef;
348
349 // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
350 // to Count after Start, we can insert at that mov.
351 if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR))
352 if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
353 return LRDef;
354
355 // We've found no suitable LR def and Start doesn't use LR directly. Can we
356 // just define LR anyway?
357 return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr;
358}
359
360bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
361 assert(VCTP && "VCTP instruction expected but is not set")((VCTP && "VCTP instruction expected but is not set")
? static_cast<void> (0) : __assert_fail ("VCTP && \"VCTP instruction expected but is not set\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 361, __PRETTY_FUNCTION__))
;
362 // All predication within the loop should be based on vctp. If the block
363 // isn't predicated on entry, check whether the vctp is within the block
364 // and that all other instructions are then predicated on it.
365 for (auto &Block : VPTBlocks) {
366 if (Block.IsPredicatedOn(VCTP))
367 continue;
368 if (!Block.HasNonUniformPredicate() || !isVCTP(Block.getDivergent()->MI)) {
369 LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found unsupported diverging predicate: "
<< *Block.getDivergent()->MI; } } while (false)
370 << *Block.getDivergent()->MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found unsupported diverging predicate: "
<< *Block.getDivergent()->MI; } } while (false)
;
371 return false;
372 }
373 SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
374 for (auto &PredMI : Insts) {
375 if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI))
376 continue;
377 LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: "
<< *PredMI.MI << " - which is predicated on:\n";
for (auto *MI : PredMI.Predicates) dbgs() << " - " <<
*MI; } } while (false)
378 << " - which is predicated on:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: "
<< *PredMI.MI << " - which is predicated on:\n";
for (auto *MI : PredMI.Predicates) dbgs() << " - " <<
*MI; } } while (false)
379 for (auto *MI : PredMI.Predicates)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: "
<< *PredMI.MI << " - which is predicated on:\n";
for (auto *MI : PredMI.Predicates) dbgs() << " - " <<
*MI; } } while (false)
380 dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't convert: "
<< *PredMI.MI << " - which is predicated on:\n";
for (auto *MI : PredMI.Predicates) dbgs() << " - " <<
*MI; } } while (false)
;
381 return false;
382 }
383 }
384
385 if (!ValidateLiveOuts())
386 return false;
387
388 // For tail predication, we need to provide the number of elements, instead
389 // of the iteration count, to the loop start instruction. The number of
390 // elements is provided to the vctp instruction, so we need to check that
391 // we can use this register at InsertPt.
392 Register NumElements = VCTP->getOperand(1).getReg();
393
394 // If the register is defined within loop, then we can't perform TP.
395 // TODO: Check whether this is just a mov of a register that would be
396 // available.
397 if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
398 LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n"
; } } while (false)
;
399 return false;
400 }
401
402 // The element count register maybe defined after InsertPt, in which case we
403 // need to try to move either InsertPt or the def so that the [w|d]lstp can
404 // use the value.
405 // TODO: On failing to move an instruction, check if the count is provided by
406 // a mov and whether we can use the mov operand directly.
407 MachineBasicBlock *InsertBB = StartInsertPt->getParent();
408 if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) {
409 if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) {
410 if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) {
411 ElemDef->removeFromParent();
412 InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef);
413 LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved element count def: "
<< *ElemDef; } } while (false)
414 << *ElemDef)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved element count def: "
<< *ElemDef; } } while (false)
;
415 } else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) {
416 StartInsertPt->removeFromParent();
417 InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
418 StartInsertPt);
419 LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Moved start past: "
<< *ElemDef; } } while (false)
;
420 } else {
421 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to move element count to loop "
<< "start instruction.\n"; } } while (false)
422 << "start instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to move element count to loop "
<< "start instruction.\n"; } } while (false)
;
423 return false;
424 }
425 }
426 }
427
428 // Especially in the case of while loops, InsertBB may not be the
429 // preheader, so we need to check that the register isn't redefined
430 // before entering the loop.
431 auto CannotProvideElements = [this](MachineBasicBlock *MBB,
432 Register NumElements) {
433 // NumElements is redefined in this block.
434 if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
435 return true;
436
437 // Don't continue searching up through multiple predecessors.
438 if (MBB->pred_size() > 1)
439 return true;
440
441 return false;
442 };
443
444 // First, find the block that looks like the preheader.
445 MachineBasicBlock *MBB = MLI.findLoopPreheader(&ML, true);
446 if (!MBB) {
447 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Didn't find preheader.\n"
; } } while (false)
;
448 return false;
449 }
450
451 // Then search backwards for a def, until we get to InsertBB.
452 while (MBB != InsertBB) {
453 if (CannotProvideElements(MBB, NumElements)) {
454 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to provide element count.\n"
; } } while (false)
;
455 return false;
456 }
457 MBB = *MBB->pred_begin();
458 }
459
460 // Check that the value change of the element count is what we expect and
461 // that the predication will be equivalent. For this we need:
462 // NumElements = NumElements - VectorWidth. The sub will be a sub immediate
463 // and we can also allow register copies within the chain too.
464 auto IsValidSub = [](MachineInstr *MI, unsigned ExpectedVecWidth) {
465 unsigned ImmOpIdx = 0;
466 switch (MI->getOpcode()) {
467 default:
468 llvm_unreachable("unhandled sub opcode")::llvm::llvm_unreachable_internal("unhandled sub opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 468)
;
469 case ARM::tSUBi3:
470 case ARM::tSUBi8:
471 ImmOpIdx = 3;
472 break;
473 case ARM::t2SUBri:
474 case ARM::t2SUBri12:
475 ImmOpIdx = 2;
476 break;
477 }
478 return MI->getOperand(ImmOpIdx).getImm() == ExpectedVecWidth;
479 };
480
481 MBB = VCTP->getParent();
482 if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) {
483 SmallPtrSet<MachineInstr*, 2> ElementChain;
484 SmallPtrSet<MachineInstr*, 2> Ignore = { VCTP };
485 unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
486
487 if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) {
488 bool FoundSub = false;
489
490 for (auto *MI : ElementChain) {
491 if (isMovRegOpcode(MI->getOpcode()))
492 continue;
493
494 if (isSubImmOpcode(MI->getOpcode())) {
495 if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth))
496 return false;
497 FoundSub = true;
498 } else
499 return false;
500 }
501
502 LLVM_DEBUG(dbgs() << "ARM Loops: Will remove element count chain:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n"
; for (auto *MI : ElementChain) dbgs() << " - " <<
*MI; } } while (false)
503 for (auto *MI : ElementChain)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n"
; for (auto *MI : ElementChain) dbgs() << " - " <<
*MI; } } while (false)
504 dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove element count chain:\n"
; for (auto *MI : ElementChain) dbgs() << " - " <<
*MI; } } while (false)
;
505 ToRemove.insert(ElementChain.begin(), ElementChain.end());
506 }
507 }
508 return true;
509}
510
511bool LowOverheadLoop::ValidateLiveOuts() const {
512 // Collect Q-regs that are live in the exit blocks. We don't collect scalars
513 // because they won't be affected by lane predication.
514 const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
515 SmallSet<Register, 2> LiveOuts;
516 SmallVector<MachineBasicBlock*, 2> ExitBlocks;
517 ML.getExitBlocks(ExitBlocks);
518 for (auto *MBB : ExitBlocks)
519 for (const MachineBasicBlock::RegisterMaskPair &RegMask : MBB->liveins())
520 if (QPRs->contains(RegMask.PhysReg))
521 LiveOuts.insert(RegMask.PhysReg);
522
523 // Collect the instructions in the loop body that define the live-out values.
524 SmallPtrSet<MachineInstr*, 2> LiveMIs;
525 MachineBasicBlock *MBB = ML.getHeader();
526 for (auto Reg : LiveOuts)
527 if (auto *MI = RDA.getLocalLiveOutMIDef(MBB, Reg))
528 LiveMIs.insert(MI);
529
530 LLVM_DEBUG(dbgs() << "ARM Loops: Found loop live-outs:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n"
; for (auto *MI : LiveMIs) dbgs() << " - " << *MI
; } } while (false)
531 for (auto *MI : LiveMIs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n"
; for (auto *MI : LiveMIs) dbgs() << " - " << *MI
; } } while (false)
532 dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found loop live-outs:\n"
; for (auto *MI : LiveMIs) dbgs() << " - " << *MI
; } } while (false)
;
533 // We've already validated that any VPT predication within the loop will be
534 // equivalent when we perform the predication transformation; so we know that
535 // any VPT predicated instruction is predicated upon VCTP. Any live-out
536 // instruction needs to be predicated, so check this here.
537 for (auto *MI : LiveMIs) {
538 int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
539 if (PIdx == -1 || MI->getOperand(PIdx+1).getReg() != ARM::VPR)
540 return false;
541 }
542
543 return true;
544}
545
546void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) {
547 if (Revert)
548 return;
549
550 if (!End->getOperand(1).isMBB())
551 report_fatal_error("Expected LoopEnd to target basic block");
552
553 // TODO Maybe there's cases where the target doesn't have to be the header,
554 // but for now be safe and revert.
555 if (End->getOperand(1).getMBB() != ML.getHeader()) {
556 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: LoopEnd is not targetting header.\n"
; } } while (false)
;
557 Revert = true;
558 return;
559 }
560
561 // The WLS and LE instructions have 12-bits for the label offset. WLS
562 // requires a positive offset, while LE uses negative.
563 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
564 !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
565 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: LE offset is out-of-range\n"
; } } while (false)
;
566 Revert = true;
567 return;
568 }
569
570 if (Start->getOpcode() == ARM::t2WhileLoopStart &&
571 (BBUtils->getOffsetOf(Start) >
572 BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
573 !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
574 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: WLS offset is out-of-range!\n"
; } } while (false)
;
575 Revert = true;
576 return;
577 }
578
579 InsertPt = Revert ? nullptr : isSafeToDefineLR();
580 if (!InsertPt) {
581 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to find safe insertion point.\n"
; } } while (false)
;
582 Revert = true;
583 return;
584 } else
585 LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Start insertion point: "
<< *InsertPt; } } while (false)
;
586
587 if (!IsTailPredicationLegal()) {
588 LLVM_DEBUG(if (!VCTP)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"
; dbgs() << "ARM Loops: Tail-predication is not valid.\n"
; } } while (false)
589 dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"
; dbgs() << "ARM Loops: Tail-predication is not valid.\n"
; } } while (false)
590 dbgs() << "ARM Loops: Tail-predication is not valid.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { if (!VCTP) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"
; dbgs() << "ARM Loops: Tail-predication is not valid.\n"
; } } while (false)
;
591 return;
592 }
593
594 assert(ML.getBlocks().size() == 1 &&((ML.getBlocks().size() == 1 && "Shouldn't be processing a loop with more than one block"
) ? static_cast<void> (0) : __assert_fail ("ML.getBlocks().size() == 1 && \"Shouldn't be processing a loop with more than one block\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 595, __PRETTY_FUNCTION__))
595 "Shouldn't be processing a loop with more than one block")((ML.getBlocks().size() == 1 && "Shouldn't be processing a loop with more than one block"
) ? static_cast<void> (0) : __assert_fail ("ML.getBlocks().size() == 1 && \"Shouldn't be processing a loop with more than one block\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 595, __PRETTY_FUNCTION__))
;
596 CannotTailPredicate = !ValidateTailPredicate(InsertPt);
597 LLVM_DEBUG(if (CannotTailPredicate)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { if (CannotTailPredicate) dbgs()
<< "ARM Loops: Couldn't validate tail predicate.\n"; }
} while (false)
598 dbgs() << "ARM Loops: Couldn't validate tail predicate.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { if (CannotTailPredicate) dbgs()
<< "ARM Loops: Couldn't validate tail predicate.\n"; }
} while (false)
;
599}
600
601bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
602 if (CannotTailPredicate)
603 return false;
604
605 // Only support a single vctp.
606 if (isVCTP(MI) && VCTP)
607 return false;
608
609 // Start a new vpt block when we discover a vpt.
610 if (MI->getOpcode() == ARM::MVE_VPST) {
611 VPTBlocks.emplace_back(MI, CurrentPredicate);
612 CurrentBlock = &VPTBlocks.back();
613 return true;
614 } else if (isVCTP(MI))
615 VCTP = MI;
616 else if (MI->getOpcode() == ARM::MVE_VPSEL ||
617 MI->getOpcode() == ARM::MVE_VPNOT)
618 return false;
619
620 // TODO: Allow VPSEL and VPNOT, we currently cannot because:
621 // 1) It will use the VPR as a predicate operand, but doesn't have to be
622 // instead a VPT block, which means we can assert while building up
623 // the VPT block because we don't find another VPST to being a new
624 // one.
625 // 2) VPSEL still requires a VPR operand even after tail predicating,
626 // which means we can't remove it unless there is another
627 // instruction, such as vcmp, that can provide the VPR def.
628
629 bool IsUse = false;
630 bool IsDef = false;
631 const MCInstrDesc &MCID = MI->getDesc();
632 for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
633 const MachineOperand &MO = MI->getOperand(i);
634 if (!MO.isReg() || MO.getReg() != ARM::VPR)
635 continue;
636
637 if (MO.isDef()) {
638 CurrentPredicate.insert(MI);
639 IsDef = true;
640 } else if (ARM::isVpred(MCID.OpInfo[i].OperandType)) {
641 CurrentBlock->addInst(MI, CurrentPredicate);
642 IsUse = true;
643 } else {
644 LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found instruction using vpr: "
<< *MI; } } while (false)
;
645 return false;
646 }
647 }
648
649 // If we find a vpr def that is not already predicated on the vctp, we've
650 // got disjoint predicates that may not be equivalent when we do the
651 // conversion.
652 if (IsDef && !IsUse && VCTP && !isVCTP(MI)) {
653 LLVM_DEBUG(dbgs() << "ARM Loops: Found disjoint vpr def: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found disjoint vpr def: "
<< *MI; } } while (false)
;
654 return false;
655 }
656
657 uint64_t Flags = MCID.TSFlags;
658 if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
659 return true;
660
661 // If we find an instruction that has been marked as not valid for tail
662 // predication, only allow the instruction if it's contained within a valid
663 // VPT block.
664 if ((Flags & ARMII::ValidForTailPredication) == 0 && !IsUse) {
665 LLVM_DEBUG(dbgs() << "ARM Loops: Can't tail predicate: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Can't tail predicate: "
<< *MI; } } while (false)
;
666 return false;
667 }
668
669 // If the instruction is already explicitly predicated, then the conversion
670 // will be fine, but ensure that all memory operations are predicated.
671 return !IsUse && MI->mayLoadOrStore() ? false : true;
672}
673
674bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
675 const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
676 if (!ST.hasLOB())
677 return false;
678
679 MF = &mf;
680 LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops on "
<< MF->getName() << " ------------- \n"; } } while
(false)
;
681
682 MLI = &getAnalysis<MachineLoopInfo>();
683 RDA = &getAnalysis<ReachingDefAnalysis>();
684 MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
685 MRI = &MF->getRegInfo();
686 TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
687 TRI = ST.getRegisterInfo();
688 BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
689 BBUtils->computeAllBlockSizes();
690 BBUtils->adjustBBOffsetsAfter(&MF->front());
691
692 bool Changed = false;
693 for (auto ML : *MLI) {
694 if (!ML->getParentLoop())
695 Changed |= ProcessLoop(ML);
696 }
697 Changed |= RevertNonLoops();
698 return Changed;
699}
700
701bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
702
703 bool Changed = false;
704
705 // Process inner loops first.
706 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
707 Changed |= ProcessLoop(*I);
708
709 LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
710 if (auto *Preheader = ML->getLoopPreheader())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
711 dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
712 else if (auto *Preheader = MLI->findLoopPreheader(ML))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
713 dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
714 else if (auto *Preheader = MLI->findLoopPreheader(ML, true))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
715 dbgs() << " - " << Preheader->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
716 for (auto *MBB : ML->getBlocks())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
717 dbgs() << " - " << MBB->getName() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
718 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Processing loop containing:\n"
; if (auto *Preheader = ML->getLoopPreheader()) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() <<
" - " << Preheader->getName() << "\n"; else if
(auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs
() << " - " << Preheader->getName() << "\n"
; for (auto *MBB : ML->getBlocks()) dbgs() << " - " <<
MBB->getName() << "\n";; } } while (false)
;
719
720 // Search the given block for a loop start instruction. If one isn't found,
721 // and there's only one predecessor block, search that one too.
722 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
723 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
724 for (auto &MI : *MBB) {
725 if (isLoopStart(MI))
726 return &MI;
727 }
728 if (MBB->pred_size() == 1)
729 return SearchForStart(*MBB->pred_begin());
730 return nullptr;
731 };
732
733 LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI);
734 // Search the preheader for the start intrinsic.
735 // FIXME: I don't see why we shouldn't be supporting multiple predecessors
736 // with potentially multiple set.loop.iterations, so we need to enable this.
737 if (auto *Preheader = ML->getLoopPreheader())
738 LoLoop.Start = SearchForStart(Preheader);
739 else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
740 LoLoop.Start = SearchForStart(Preheader);
741 else
742 return false;
743
744 // Find the low-overhead loop components and decide whether or not to fall
745 // back to a normal loop. Also look for a vctp instructions and decide
746 // whether we can convert that predicate using tail predication.
747 for (auto *MBB : reverse(ML->getBlocks())) {
748 for (auto &MI : *MBB) {
749 if (MI.isDebugValue())
750 continue;
751 else if (MI.getOpcode() == ARM::t2LoopDec)
752 LoLoop.Dec = &MI;
753 else if (MI.getOpcode() == ARM::t2LoopEnd)
754 LoLoop.End = &MI;
755 else if (isLoopStart(MI))
756 LoLoop.Start = &MI;
757 else if (MI.getDesc().isCall()) {
758 // TODO: Though the call will require LE to execute again, does this
759 // mean we should revert? Always executing LE hopefully should be
760 // faster than performing a sub,cmp,br or even subs,br.
761 LoLoop.Revert = true;
762 LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Found call.\n"
; } } while (false)
;
763 } else {
764 // Record VPR defs and build up their corresponding vpt blocks.
765 // Check we know how to tail predicate any mve instructions.
766 LoLoop.AnalyseMVEInst(&MI);
767 }
768 }
769 }
770
771 LLVM_DEBUG(LoLoop.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { LoLoop.dump(); } } while (false
)
;
772 if (!LoLoop.FoundAllComponents()) {
773 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Didn't find loop start, update, end\n"
; } } while (false)
;
774 return false;
775 }
776
777 // Check that the only instruction using LoopDec is LoopEnd.
778 // TODO: Check for copy chains that really have no effect.
779 SmallPtrSet<MachineInstr*, 2> Uses;
780 RDA->getReachingLocalUses(LoLoop.Dec, ARM::LR, Uses);
781 if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
782 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unable to remove LoopDec.\n"
; } } while (false)
;
783 LoLoop.Revert = true;
784 }
785 LoLoop.CheckLegality(BBUtils.get());
786 Expand(LoLoop);
787 return true;
788}
789
790// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
791// beq that branches to the exit branch.
792// TODO: We could also try to generate a cbz if the value in LR is also in
793// another low register.
794void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
795 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to cmp: "
<< *MI; } } while (false)
;
796 MachineBasicBlock *MBB = MI->getParent();
797 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
798 TII->get(ARM::t2CMPri));
799 MIB.add(MI->getOperand(0));
800 MIB.addImm(0);
801 MIB.addImm(ARMCC::AL);
802 MIB.addReg(ARM::NoRegister);
803
804 MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
805 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
806 ARM::tBcc : ARM::t2Bcc;
807
808 MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
809 MIB.add(MI->getOperand(1)); // branch target
810 MIB.addImm(ARMCC::EQ); // condition code
811 MIB.addReg(ARM::CPSR);
812 MI->eraseFromParent();
813}
814
815bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
816 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to sub: "
<< *MI; } } while (false)
;
817 MachineBasicBlock *MBB = MI->getParent();
818 SmallPtrSet<MachineInstr*, 1> Ignore;
819 for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) {
820 if (I->getOpcode() == ARM::t2LoopEnd) {
821 Ignore.insert(&*I);
822 break;
823 }
824 }
825
826 // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
827 bool SetFlags = RDA->isSafeToDefRegAt(MI, ARM::CPSR, Ignore);
828
829 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
830 TII->get(ARM::t2SUBri));
831 MIB.addDef(ARM::LR);
832 MIB.add(MI->getOperand(1));
833 MIB.add(MI->getOperand(2));
834 MIB.addImm(ARMCC::AL);
835 MIB.addReg(0);
836
837 if (SetFlags) {
838 MIB.addReg(ARM::CPSR);
839 MIB->getOperand(5).setIsDef(true);
840 } else
841 MIB.addReg(0);
842
843 MI->eraseFromParent();
844 return SetFlags;
845}
846
847// Generate a subs, or sub and cmp, and a branch instead of an LE.
848void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
849 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting to cmp, br: "
<< *MI; } } while (false)
;
850
851 MachineBasicBlock *MBB = MI->getParent();
852 // Create cmp
853 if (!SkipCmp) {
854 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
855 TII->get(ARM::t2CMPri));
856 MIB.addReg(ARM::LR);
857 MIB.addImm(0);
858 MIB.addImm(ARMCC::AL);
859 MIB.addReg(ARM::NoRegister);
860 }
861
862 MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
863 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
864 ARM::tBcc : ARM::t2Bcc;
865
866 // Create bne
867 MachineInstrBuilder MIB =
868 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
869 MIB.add(MI->getOperand(1)); // branch target
870 MIB.addImm(ARMCC::NE); // condition code
871 MIB.addReg(ARM::CPSR);
872 MI->eraseFromParent();
873}
874
875// Perform dead code elimation on the loop iteration count setup expression.
876// If we are tail-predicating, the number of elements to be processed is the
877// operand of the VCTP instruction in the vector body, see getCount(), which is
878// register $r3 in this example:
879//
880// $lr = big-itercount-expression
881// ..
882// t2DoLoopStart renamable $lr
883// vector.body:
884// ..
885// $vpr = MVE_VCTP32 renamable $r3
886// renamable $lr = t2LoopDec killed renamable $lr, 1
887// t2LoopEnd renamable $lr, %vector.body
888// tB %end
889//
890// What we would like achieve here is to replace the do-loop start pseudo
891// instruction t2DoLoopStart with:
892//
893// $lr = MVE_DLSTP_32 killed renamable $r3
894//
895// Thus, $r3 which defines the number of elements, is written to $lr,
896// and then we want to delete the whole chain that used to define $lr,
897// see the comment below how this chain could look like.
898//
899void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
900 if (!LoLoop.IsTailPredicationLegal())
901 return;
902
903 LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n"
; } } while (false)
;
904
905 MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 0);
906 if (!Def) {
907 LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Couldn't find iteration count.\n"
; } } while (false)
;
908 return;
909 }
910
911 // Collect and remove the users of iteration count.
912 SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
913 LoLoop.End, LoLoop.InsertPt };
914 SmallPtrSet<MachineInstr*, 2> Remove;
915 if (RDA->isSafeToRemove(Def, Remove, Killed))
916 LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
917 else {
918 LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"
; } } while (false)
;
919 return;
920 }
921
922 // Collect the dead code and the MBBs in which they reside.
923 RDA->collectKilledOperands(Def, Killed);
924 SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
925 for (auto *MI : Killed)
926 BasicBlocks.insert(MI->getParent());
927
928 // Collect IT blocks in all affected basic blocks.
929 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
930 for (auto *MBB : BasicBlocks) {
931 for (auto &MI : *MBB) {
932 if (MI.getOpcode() != ARM::t2IT)
933 continue;
934 RDA->getReachingLocalUses(&MI, ARM::ITSTATE, ITBlocks[&MI]);
935 }
936 }
937
938 // If we're removing all of the instructions within an IT block, then
939 // also remove the IT instruction.
940 SmallPtrSet<MachineInstr*, 2> ModifiedITs;
941 for (auto *MI : Killed) {
942 if (MachineOperand *MO = MI->findRegisterUseOperand(ARM::ITSTATE)) {
943 MachineInstr *IT = RDA->getMIOperand(MI, *MO);
944 auto &CurrentBlock = ITBlocks[IT];
945 CurrentBlock.erase(MI);
946 if (CurrentBlock.empty())
947 ModifiedITs.erase(IT);
948 else
949 ModifiedITs.insert(IT);
950 }
951 }
952
953 // Delete the killed instructions only if we don't have any IT blocks that
954 // need to be modified because we need to fixup the mask.
955 // TODO: Handle cases where IT blocks are modified.
956 if (ModifiedITs.empty()) {
957 LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n"
; for (auto *MI : Killed) dbgs() << " - " << *MI;
} } while (false)
958 for (auto *MI : Killed)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n"
; for (auto *MI : Killed) dbgs() << " - " << *MI;
} } while (false)
959 dbgs() << " - " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Will remove iteration count:\n"
; for (auto *MI : Killed) dbgs() << " - " << *MI;
} } while (false)
;
960 LoLoop.ToRemove.insert(Killed.begin(), Killed.end());
961 } else
962 LLVM_DEBUG(dbgs() << "ARM Loops: Would need to modify IT block(s).\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Would need to modify IT block(s).\n"
; } } while (false)
;
963}
964
965MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
966 LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Expanding LoopStart.\n"
; } } while (false)
;
967 // When using tail-predication, try to delete the dead code that was used to
968 // calculate the number of loop iterations.
969 IterationCountDCE(LoLoop);
970
971 MachineInstr *InsertPt = LoLoop.InsertPt;
972 MachineInstr *Start = LoLoop.Start;
973 MachineBasicBlock *MBB = InsertPt->getParent();
974 bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
975 unsigned Opc = LoLoop.getStartOpcode();
976 MachineOperand &Count = LoLoop.getCount();
977
978 MachineInstrBuilder MIB =
979 BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
980
981 MIB.addDef(ARM::LR);
982 MIB.add(Count);
983 if (!IsDo)
984 MIB.add(Start->getOperand(1));
985
986 // If we're inserting at a mov lr, then remove it as it's redundant.
987 if (InsertPt != Start)
988 LoLoop.ToRemove.insert(InsertPt);
989 LoLoop.ToRemove.insert(Start);
990 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Inserted start: "
<< *MIB; } } while (false)
;
991 return &*MIB;
992}
993
994void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
995 auto RemovePredicate = [](MachineInstr *MI) {
996 LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing predicate from: "
<< *MI; } } while (false)
;
997 if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) {
998 assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&((MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
"Expected Then predicate!") ? static_cast<void> (0) : __assert_fail
("MI->getOperand(PIdx).getImm() == ARMVCC::Then && \"Expected Then predicate!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 999, __PRETTY_FUNCTION__))
999 "Expected Then predicate!")((MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
"Expected Then predicate!") ? static_cast<void> (0) : __assert_fail
("MI->getOperand(PIdx).getImm() == ARMVCC::Then && \"Expected Then predicate!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 999, __PRETTY_FUNCTION__))
;
1000 MI->getOperand(PIdx).setImm(ARMVCC::None);
1001 MI->getOperand(PIdx+1).setReg(0);
1002 } else
1003 llvm_unreachable("trying to unpredicate a non-predicated instruction")::llvm::llvm_unreachable_internal("trying to unpredicate a non-predicated instruction"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp"
, 1003)
;
1004 };
1005
1006 // There are a few scenarios which we have to fix up:
1007 // 1) A VPT block with is only predicated by the vctp and has no internal vpr
1008 // defs.
1009 // 2) A VPT block which is only predicated by the vctp but has an internal
1010 // vpr def.
1011 // 3) A VPT block which is predicated upon the vctp as well as another vpr
1012 // def.
1013 // 4) A VPT block which is not predicated upon a vctp, but contains it and
1014 // all instructions within the block are predicated upon in.
1015
1016 for (auto &Block : LoLoop.getVPTBlocks()) {
1
Assuming '__begin1' is not equal to '__end1'
1017 SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
1018 if (Block.HasNonUniformPredicate()) {
2
Calling 'VPTBlock::HasNonUniformPredicate'
5
Returning from 'VPTBlock::HasNonUniformPredicate'
6
Taking true branch
1019 PredicatedMI *Divergent = Block.getDivergent();
1020 if (isVCTP(Divergent->MI)) {
7
Calling 'isVCTP'
11
Returning from 'isVCTP'
12
Taking false branch
1021 // The vctp will be removed, so the size of the vpt block needs to be
1022 // modified.
1023 uint64_t Size = getARMVPTBlockMask(Block.size() - 1);
1024 Block.getVPST()->getOperand(0).setImm(Size);
1025 LLVM_DEBUG(dbgs() << "ARM Loops: Modified VPT block mask.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Modified VPT block mask.\n"
; } } while (false)
;
1026 } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
13
Calling 'VPTBlock::IsOnlyPredicatedOn'
17
Returning from 'VPTBlock::IsOnlyPredicatedOn'
18
Taking true branch
1027 // The VPT block has a non-uniform predicate but it's entry is guarded
1028 // only by a vctp, which means we:
1029 // - Need to remove the original vpst.
1030 // - Then need to unpredicate any following instructions, until
1031 // we come across the divergent vpr def.
1032 // - Insert a new vpst to predicate the instruction(s) that following
1033 // the divergent vpr def.
1034 // TODO: We could be producing more VPT blocks than necessary and could
1035 // fold the newly created one into a proceeding one.
1036 for (auto I = ++MachineBasicBlock::iterator(Block.getVPST()),
19
Loop condition is false. Execution continues on line 1040
1037 E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I)
1038 RemovePredicate(&*I);
1039
1040 unsigned Size = 0;
1041 auto E = MachineBasicBlock::reverse_iterator(Divergent->MI);
1042 auto I = MachineBasicBlock::reverse_iterator(Insts.back().MI);
1043 MachineInstr *InsertAt = nullptr;
20
'InsertAt' initialized to a null pointer value
1044 while (I != E) {
21
Loop condition is false. Execution continues on line 1049
1045 InsertAt = &*I;
1046 ++Size;
1047 ++I;
1048 }
1049 MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt,
22
Called C++ object pointer is null
1050 InsertAt->getDebugLoc(),
1051 TII->get(ARM::MVE_VPST));
1052 MIB.addImm(getARMVPTBlockMask(Size));
1053 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VPST: "
<< *Block.getVPST(); } } while (false)
;
1054 LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Created VPST: "
<< *MIB; } } while (false)
;
1055 LoLoop.ToRemove.insert(Block.getVPST());
1056 }
1057 } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
1058 // A vpt block which is only predicated upon vctp and has no internal vpr
1059 // defs:
1060 // - Remove vpst.
1061 // - Unpredicate the remaining instructions.
1062 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VPST: "
<< *Block.getVPST(); } } while (false)
;
1063 LoLoop.ToRemove.insert(Block.getVPST());
1064 for (auto &PredMI : Insts)
1065 RemovePredicate(PredMI.MI);
1066 }
1067 }
1068 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing VCTP: "
<< *LoLoop.VCTP; } } while (false)
;
1069 LoLoop.ToRemove.insert(LoLoop.VCTP);
1070}
1071
1072void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1073
1074 // Combine the LoopDec and LoopEnd instructions into LE(TP).
1075 auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {
1076 MachineInstr *End = LoLoop.End;
1077 MachineBasicBlock *MBB = End->getParent();
1078 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1079 ARM::MVE_LETP : ARM::t2LEUpdate;
1080 MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
1081 TII->get(Opc));
1082 MIB.addDef(ARM::LR);
1083 MIB.add(End->getOperand(0));
1084 MIB.add(End->getOperand(1));
1085 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Inserted LE: "
<< *MIB; } } while (false)
;
1086 LoLoop.ToRemove.insert(LoLoop.Dec);
1087 LoLoop.ToRemove.insert(End);
1088 return &*MIB;
1089 };
1090
1091 // TODO: We should be able to automatically remove these branches before we
1092 // get here - probably by teaching analyzeBranch about the pseudo
1093 // instructions.
1094 // If there is an unconditional branch, after I, that just branches to the
1095 // next block, remove it.
1096 auto RemoveDeadBranch = [](MachineInstr *I) {
1097 MachineBasicBlock *BB = I->getParent();
1098 MachineInstr *Terminator = &BB->instr_back();
1099 if (Terminator->isUnconditionalBranch() && I != Terminator) {
1100 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
1101 if (BB->isLayoutSuccessor(Succ)) {
1102 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Removing branch: "
<< *Terminator; } } while (false)
;
1103 Terminator->eraseFromParent();
1104 }
1105 }
1106 };
1107
1108 if (LoLoop.Revert) {
1109 if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
1110 RevertWhile(LoLoop.Start);
1111 else
1112 LoLoop.Start->eraseFromParent();
1113 bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec);
1114 RevertLoopEnd(LoLoop.End, FlagsAlreadySet);
1115 } else {
1116 LoLoop.Start = ExpandLoopStart(LoLoop);
1117 RemoveDeadBranch(LoLoop.Start);
1118 LoLoop.End = ExpandLoopEnd(LoLoop);
1119 RemoveDeadBranch(LoLoop.End);
1120 if (LoLoop.IsTailPredicationLegal())
1121 ConvertVPTBlocks(LoLoop);
1122 for (auto *I : LoLoop.ToRemove) {
1123 LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Erasing "
<< *I; } } while (false)
;
1124 I->eraseFromParent();
1125 }
1126 }
1127
1128 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1129 DFS.ProcessLoop();
1130 const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
1131 for (auto *MBB : PostOrder) {
1132 recomputeLiveIns(*MBB);
1133 // FIXME: For some reason, the live-in print order is non-deterministic for
1134 // our tests and I can't out why... So just sort them.
1135 MBB->sortUniqueLiveIns();
1136 }
1137
1138 for (auto *MBB : reverse(PostOrder))
1139 recomputeLivenessFlags(*MBB);
1140
1141 // We've moved, removed and inserted new instructions, so update RDA.
1142 RDA->reset();
1143}
1144
1145bool ARMLowOverheadLoops::RevertNonLoops() {
1146 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-low-overhead-loops")) { dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"
; } } while (false)
;
1147 bool Changed = false;
1148
1149 for (auto &MBB : *MF) {
1150 SmallVector<MachineInstr*, 4> Starts;
1151 SmallVector<MachineInstr*, 4> Decs;
1152 SmallVector<MachineInstr*, 4> Ends;
1153
1154 for (auto &I : MBB) {
1155 if (isLoopStart(I))
1156 Starts.push_back(&I);
1157 else if (I.getOpcode() == ARM::t2LoopDec)
1158 Decs.push_back(&I);
1159 else if (I.getOpcode() == ARM::t2LoopEnd)
1160 Ends.push_back(&I);
1161 }
1162
1163 if (Starts.empty() && Decs.empty() && Ends.empty())
1164 continue;
1165
1166 Changed = true;
1167
1168 for (auto *Start : Starts) {
1169 if (Start->getOpcode() == ARM::t2WhileLoopStart)
1170 RevertWhile(Start);
1171 else
1172 Start->eraseFromParent();
1173 }
1174 for (auto *Dec : Decs)
1175 RevertLoopDec(Dec);
1176
1177 for (auto *End : Ends)
1178 RevertLoopEnd(End);
1179 }
1180 return Changed;
1181}
1182
1183FunctionPass *llvm::createARMLowOverheadLoopsPass() {
1184 return new ARMLowOverheadLoops();
1185}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h

1//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
14#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
15
16#include "MCTargetDesc/ARMBaseInfo.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/MachineInstr.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineOperand.h"
23#include "llvm/CodeGen/TargetInstrInfo.h"
24#include <array>
25#include <cstdint>
26
27#define GET_INSTRINFO_HEADER
28#include "ARMGenInstrInfo.inc"
29
30namespace llvm {
31
32class ARMBaseRegisterInfo;
33class ARMSubtarget;
34
35class ARMBaseInstrInfo : public ARMGenInstrInfo {
36 const ARMSubtarget &Subtarget;
37
38protected:
39 // Can be only subclassed.
40 explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
41
42 void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
43 unsigned LoadImmOpc, unsigned LoadOpc) const;
44
45 /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
46 /// and \p DefIdx.
47 /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
48 /// the list is modeled as <Reg:SubReg, SubIdx>.
49 /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce
50 /// two elements:
51 /// - %1:sub1, sub0
52 /// - %2<:0>, sub1
53 ///
54 /// \returns true if it is possible to build such an input sequence
55 /// with the pair \p MI, \p DefIdx. False otherwise.
56 ///
57 /// \pre MI.isRegSequenceLike().
58 bool getRegSequenceLikeInputs(
59 const MachineInstr &MI, unsigned DefIdx,
60 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
61
62 /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
63 /// and \p DefIdx.
64 /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
65 /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce:
66 /// - %1:sub1, sub0
67 ///
68 /// \returns true if it is possible to build such an input sequence
69 /// with the pair \p MI, \p DefIdx. False otherwise.
70 ///
71 /// \pre MI.isExtractSubregLike().
72 bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
73 RegSubRegPairAndIdx &InputReg) const override;
74
75 /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
76 /// and \p DefIdx.
77 /// \p [out] BaseReg and \p [out] InsertedReg contain
78 /// the equivalent inputs of INSERT_SUBREG.
79 /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce:
80 /// - BaseReg: %0:sub0
81 /// - InsertedReg: %1:sub1, sub3
82 ///
83 /// \returns true if it is possible to build such an input sequence
84 /// with the pair \p MI, \p DefIdx. False otherwise.
85 ///
86 /// \pre MI.isInsertSubregLike().
87 bool
88 getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
89 RegSubRegPair &BaseReg,
90 RegSubRegPairAndIdx &InsertedReg) const override;
91
92 /// Commutes the operands in the given instruction.
93 /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
94 ///
95 /// Do not call this method for a non-commutable instruction or for
96 /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
97 /// Even though the instruction is commutable, the method may still
98 /// fail to commute the operands, null pointer is returned in such cases.
99 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
100 unsigned OpIdx1,
101 unsigned OpIdx2) const override;
102 /// If the specific machine instruction is an instruction that moves/copies
103 /// value from one register to another register return destination and source
104 /// registers as machine operands.
105 Optional<DestSourcePair>
106 isCopyInstrImpl(const MachineInstr &MI) const override;
107
108 /// Specialization of \ref TargetInstrInfo::describeLoadedValue, used to
109 /// enhance debug entry value descriptions for ARM targets.
110 Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
111 Register Reg) const override;
112
113public:
114 // Return whether the target has an explicit NOP encoding.
115 bool hasNOP() const;
116
117 // Return the non-pre/post incrementing version of 'Opc'. Return 0
118 // if there is not such an opcode.
119 virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
120
121 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
122 MachineInstr &MI,
123 LiveVariables *LV) const override;
124
125 virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
126 const ARMSubtarget &getSubtarget() const { return Subtarget; }
127
128 ScheduleHazardRecognizer *
129 CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
130 const ScheduleDAG *DAG) const override;
131
132 ScheduleHazardRecognizer *
133 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
134 const ScheduleDAG *DAG) const override;
135
136 // Branch analysis.
137 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
138 MachineBasicBlock *&FBB,
139 SmallVectorImpl<MachineOperand> &Cond,
140 bool AllowModify = false) const override;
141 unsigned removeBranch(MachineBasicBlock &MBB,
142 int *BytesRemoved = nullptr) const override;
143 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
144 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
145 const DebugLoc &DL,
146 int *BytesAdded = nullptr) const override;
147
148 bool
149 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
150
151 // Predication support.
152 bool isPredicated(const MachineInstr &MI) const override;
153
154 // MIR printer helper function to annotate Operands with a comment.
155 std::string createMIROperandComment(const MachineInstr &MI,
156 const MachineOperand &Op,
157 unsigned OpIdx) const override;
158
159 ARMCC::CondCodes getPredicate(const MachineInstr &MI) const {
160 int PIdx = MI.findFirstPredOperandIdx();
161 return PIdx != -1 ? (ARMCC::CondCodes)MI.getOperand(PIdx).getImm()
162 : ARMCC::AL;
163 }
164
165 bool PredicateInstruction(MachineInstr &MI,
166 ArrayRef<MachineOperand> Pred) const override;
167
168 bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
169 ArrayRef<MachineOperand> Pred2) const override;
170
171 bool DefinesPredicate(MachineInstr &MI,
172 std::vector<MachineOperand> &Pred) const override;
173
174 bool isPredicable(const MachineInstr &MI) const override;
175
176 // CPSR defined in instruction
177 static bool isCPSRDefined(const MachineInstr &MI);
178 bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
179 bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;
180
181 // Load, scaled register offset
182 bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
183 // Load, scaled register offset, not plus LSL2
184 bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
185 // Minus reg for ldstso addr mode
186 bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
187 // Scaled register offset in address mode 2
188 bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
189 // Load multiple, base reg in list
190 bool isLDMBaseRegInList(const MachineInstr &MI) const;
191 // get LDM variable defs size
192 unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;
193
194 /// GetInstSize - Returns the size of the specified MachineInstr.
195 ///
196 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
197
198 unsigned isLoadFromStackSlot(const MachineInstr &MI,
199 int &FrameIndex) const override;
200 unsigned isStoreToStackSlot(const MachineInstr &MI,
201 int &FrameIndex) const override;
202 unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
203 int &FrameIndex) const override;
204 unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
205 int &FrameIndex) const override;
206
207 void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
208 unsigned SrcReg, bool KillSrc,
209 const ARMSubtarget &Subtarget) const;
210 void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
211 unsigned DestReg, bool KillSrc,
212 const ARMSubtarget &Subtarget) const;
213
214 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
215 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
216 bool KillSrc) const override;
217
218 void storeRegToStackSlot(MachineBasicBlock &MBB,
219 MachineBasicBlock::iterator MBBI,
220 Register SrcReg, bool isKill, int FrameIndex,
221 const TargetRegisterClass *RC,
222 const TargetRegisterInfo *TRI) const override;
223
224 void loadRegFromStackSlot(MachineBasicBlock &MBB,
225 MachineBasicBlock::iterator MBBI,
226 Register DestReg, int FrameIndex,
227 const TargetRegisterClass *RC,
228 const TargetRegisterInfo *TRI) const override;
229
230 bool expandPostRAPseudo(MachineInstr &MI) const override;
231
232 bool shouldSink(const MachineInstr &MI) const override;
233
234 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
235 unsigned DestReg, unsigned SubIdx,
236 const MachineInstr &Orig,
237 const TargetRegisterInfo &TRI) const override;
238
239 MachineInstr &
240 duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
241 const MachineInstr &Orig) const override;
242
243 const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
244 unsigned SubIdx, unsigned State,
245 const TargetRegisterInfo *TRI) const;
246
247 bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1,
248 const MachineRegisterInfo *MRI) const override;
249
250 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
251 /// determine if two loads are loading from the same base address. It should
252 /// only return true if the base pointers are the same and the only
253 /// differences between the two addresses is the offset. It also returns the
254 /// offsets by reference.
255 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1,
256 int64_t &Offset2) const override;
257
258 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
259 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
260 /// should be scheduled togther. On some targets if two loads are loading from
261 /// addresses in the same cache line, it's better if they are scheduled
262 /// together. This function takes two integers that represent the load offsets
263 /// from the common base address. It returns true if it decides it's desirable
264 /// to schedule the two loads together. "NumLoads" is the number of loads that
265 /// have already been scheduled after Load1.
266 bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
267 int64_t Offset1, int64_t Offset2,
268 unsigned NumLoads) const override;
269
270 bool isSchedulingBoundary(const MachineInstr &MI,
271 const MachineBasicBlock *MBB,
272 const MachineFunction &MF) const override;
273
274 bool isProfitableToIfCvt(MachineBasicBlock &MBB,
275 unsigned NumCycles, unsigned ExtraPredCycles,
276 BranchProbability Probability) const override;
277
278 bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
279 unsigned ExtraT, MachineBasicBlock &FMBB,
280 unsigned NumF, unsigned ExtraF,
281 BranchProbability Probability) const override;
282
283 bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
284 BranchProbability Probability) const override {
285 return NumCycles == 1;
286 }
287
288 unsigned extraSizeToPredicateInstructions(const MachineFunction &MF,
289 unsigned NumInsts) const override;
290 unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override;
291
292 bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
293 MachineBasicBlock &FMBB) const override;
294
295 /// analyzeCompare - For a comparison instruction, return the source registers
296 /// in SrcReg and SrcReg2 if having two register operands, and the value it
297 /// compares against in CmpValue. Return true if the comparison instruction
298 /// can be analyzed.
299 bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
300 unsigned &SrcReg2, int &CmpMask,
301 int &CmpValue) const override;
302
303 /// optimizeCompareInstr - Convert the instruction to set the zero flag so
304 /// that we can remove a "comparison with zero"; Remove a redundant CMP
305 /// instruction if the flags can be updated in the same way by an earlier
306 /// instruction such as SUB.
307 bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
308 unsigned SrcReg2, int CmpMask, int CmpValue,
309 const MachineRegisterInfo *MRI) const override;
310
311 bool analyzeSelect(const MachineInstr &MI,
312 SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp,
313 unsigned &FalseOp, bool &Optimizable) const override;
314
315 MachineInstr *optimizeSelect(MachineInstr &MI,
316 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
317 bool) const override;
318
319 /// FoldImmediate - 'Reg' is known to be defined by a move immediate
320 /// instruction, try to fold the immediate into the use instruction.
321 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
322 MachineRegisterInfo *MRI) const override;
323
324 unsigned getNumMicroOps(const InstrItineraryData *ItinData,
325 const MachineInstr &MI) const override;
326
327 int getOperandLatency(const InstrItineraryData *ItinData,
328 const MachineInstr &DefMI, unsigned DefIdx,
329 const MachineInstr &UseMI,
330 unsigned UseIdx) const override;
331 int getOperandLatency(const InstrItineraryData *ItinData,
332 SDNode *DefNode, unsigned DefIdx,
333 SDNode *UseNode, unsigned UseIdx) const override;
334
335 /// VFP/NEON execution domains.
336 std::pair<uint16_t, uint16_t>
337 getExecutionDomain(const MachineInstr &MI) const override;
338 void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override;
339
340 unsigned
341 getPartialRegUpdateClearance(const MachineInstr &, unsigned,
342 const TargetRegisterInfo *) const override;
343 void breakPartialRegDependency(MachineInstr &, unsigned,
344 const TargetRegisterInfo *TRI) const override;
345
346 /// Get the number of addresses by LDM or VLDM or zero for unknown.
347 unsigned getNumLDMAddresses(const MachineInstr &MI) const;
348
349 std::pair<unsigned, unsigned>
350 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
351 ArrayRef<std::pair<unsigned, const char *>>
352 getSerializableDirectMachineOperandTargetFlags() const override;
353 ArrayRef<std::pair<unsigned, const char *>>
354 getSerializableBitmaskMachineOperandTargetFlags() const override;
355
356private:
357 unsigned getInstBundleLength(const MachineInstr &MI) const;
358
359 int getVLDMDefCycle(const InstrItineraryData *ItinData,
360 const MCInstrDesc &DefMCID,
361 unsigned DefClass,
362 unsigned DefIdx, unsigned DefAlign) const;
363 int getLDMDefCycle(const InstrItineraryData *ItinData,
364 const MCInstrDesc &DefMCID,
365 unsigned DefClass,
366 unsigned DefIdx, unsigned DefAlign) const;
367 int getVSTMUseCycle(const InstrItineraryData *ItinData,
368 const MCInstrDesc &UseMCID,
369 unsigned UseClass,
370 unsigned UseIdx, unsigned UseAlign) const;
371 int getSTMUseCycle(const InstrItineraryData *ItinData,
372 const MCInstrDesc &UseMCID,
373 unsigned UseClass,
374 unsigned UseIdx, unsigned UseAlign) const;
375 int getOperandLatency(const InstrItineraryData *ItinData,
376 const MCInstrDesc &DefMCID,
377 unsigned DefIdx, unsigned DefAlign,
378 const MCInstrDesc &UseMCID,
379 unsigned UseIdx, unsigned UseAlign) const;
380
381 int getOperandLatencyImpl(const InstrItineraryData *ItinData,
382 const MachineInstr &DefMI, unsigned DefIdx,
383 const MCInstrDesc &DefMCID, unsigned DefAdj,
384 const MachineOperand &DefMO, unsigned Reg,
385 const MachineInstr &UseMI, unsigned UseIdx,
386 const MCInstrDesc &UseMCID, unsigned UseAdj) const;
387
388 unsigned getPredicationCost(const MachineInstr &MI) const override;
389
390 unsigned getInstrLatency(const InstrItineraryData *ItinData,
391 const MachineInstr &MI,
392 unsigned *PredCost = nullptr) const override;
393
394 int getInstrLatency(const InstrItineraryData *ItinData,
395 SDNode *Node) const override;
396
397 bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
398 const MachineRegisterInfo *MRI,
399 const MachineInstr &DefMI, unsigned DefIdx,
400 const MachineInstr &UseMI,
401 unsigned UseIdx) const override;
402 bool hasLowDefLatency(const TargetSchedModel &SchedModel,
403 const MachineInstr &DefMI,
404 unsigned DefIdx) const override;
405
406 /// verifyInstruction - Perform target specific instruction verification.
407 bool verifyInstruction(const MachineInstr &MI,
408 StringRef &ErrInfo) const override;
409
410 virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI) const = 0;
411
412 void expandMEMCPY(MachineBasicBlock::iterator) const;
413
414 /// Identify instructions that can be folded into a MOVCC instruction, and
415 /// return the defining instruction.
416 MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
417 const TargetInstrInfo *TII) const;
418
419private:
420 /// Modeling special VFP / NEON fp MLA / MLS hazards.
421
422 /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
423 /// MLx table.
424 DenseMap<unsigned, unsigned> MLxEntryMap;
425
426 /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
427 /// stalls when scheduled together with fp MLA / MLS opcodes.
428 SmallSet<unsigned, 16> MLxHazardOpcodes;
429
430public:
431 /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
432 /// instruction.
433 bool isFpMLxInstruction(unsigned Opcode) const {
434 return MLxEntryMap.count(Opcode);
435 }
436
437 /// isFpMLxInstruction - This version also returns the multiply opcode and the
438 /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
439 /// the MLX instructions with an extra lane operand.
440 bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
441 unsigned &AddSubOpc, bool &NegAcc,
442 bool &HasLane) const;
443
444 /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
445 /// will cause stalls when scheduled after (within 4-cycle window) a fp
446 /// MLA / MLS instruction.
447 bool canCauseFpMLxStall(unsigned Opcode) const {
448 return MLxHazardOpcodes.count(Opcode);
449 }
450
451 /// Returns true if the instruction has a shift by immediate that can be
452 /// executed in one cycle less.
453 bool isSwiftFastImmShift(const MachineInstr *MI) const;
454
455 /// Returns predicate register associated with the given frame instruction.
456 unsigned getFramePred(const MachineInstr &MI) const {
457 assert(isFrameInstr(MI))((isFrameInstr(MI)) ? static_cast<void> (0) : __assert_fail
("isFrameInstr(MI)", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h"
, 457, __PRETTY_FUNCTION__))
;
458 // Operands of ADJCALLSTACKDOWN/ADJCALLSTACKUP:
459 // - argument declared in the pattern:
460 // 0 - frame size
461 // 1 - arg of CALLSEQ_START/CALLSEQ_END
462 // 2 - predicate code (like ARMCC::AL)
463 // - added by predOps:
464 // 3 - predicate reg
465 return MI.getOperand(3).getReg();
466 }
467
468 Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
469 Register Reg) const override;
470};
471
472/// Get the operands corresponding to the given \p Pred value. By default, the
473/// predicate register is assumed to be 0 (no register), but you can pass in a
474/// \p PredReg if that is not the case.
475static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred,
476 unsigned PredReg = 0) {
477 return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)),
478 MachineOperand::CreateReg(PredReg, false)}};
479}
480
481/// Get the operand corresponding to the conditional code result. By default,
482/// this is 0 (no register).
483static inline MachineOperand condCodeOp(unsigned CCReg = 0) {
484 return MachineOperand::CreateReg(CCReg, false);
485}
486
487/// Get the operand corresponding to the conditional code result for Thumb1.
488/// This operand will always refer to CPSR and it will have the Define flag set.
489/// You can optionally set the Dead flag by means of \p isDead.
490static inline MachineOperand t1CondCodeOp(bool isDead = false) {
491 return MachineOperand::CreateReg(ARM::CPSR,
492 /*Define*/ true, /*Implicit*/ false,
493 /*Kill*/ false, isDead);
494}
495
496static inline
497bool isUncondBranchOpcode(int Opc) {
498 return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
499}
500
501// This table shows the VPT instruction variants, i.e. the different
502// mask field encodings, see also B5.6. Predication/conditional execution in
503// the ArmARM.
504
505
506inline static unsigned getARMVPTBlockMask(unsigned NumInsts) {
507 switch (NumInsts) {
508 case 1:
509 return ARMVCC::T;
510 case 2:
511 return ARMVCC::TT;
512 case 3:
513 return ARMVCC::TTT;
514 case 4:
515 return ARMVCC::TTTT;
516 default:
517 break;
518 };
519 llvm_unreachable("Unexpected number of instruction in a VPT block")::llvm::llvm_unreachable_internal("Unexpected number of instruction in a VPT block"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h"
, 519)
;
520}
521
522
523static inline bool isVPTOpcode(int Opc) {
524 return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 ||
525 Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 ||
526 Opc == ARM::MVE_VPTv8u16 || Opc == ARM::MVE_VPTv8s16 ||
527 Opc == ARM::MVE_VPTv4i32 || Opc == ARM::MVE_VPTv4u32 ||
528 Opc == ARM::MVE_VPTv4s32 || Opc == ARM::MVE_VPTv4f32 ||
529 Opc == ARM::MVE_VPTv8f16 || Opc == ARM::MVE_VPTv16i8r ||
530 Opc == ARM::MVE_VPTv16u8r || Opc == ARM::MVE_VPTv16s8r ||
531 Opc == ARM::MVE_VPTv8i16r || Opc == ARM::MVE_VPTv8u16r ||
532 Opc == ARM::MVE_VPTv8s16r || Opc == ARM::MVE_VPTv4i32r ||
533 Opc == ARM::MVE_VPTv4u32r || Opc == ARM::MVE_VPTv4s32r ||
534 Opc == ARM::MVE_VPTv4f32r || Opc == ARM::MVE_VPTv8f16r ||
535 Opc == ARM::MVE_VPST;
536}
537
538static inline
539unsigned VCMPOpcodeToVPT(unsigned Opcode) {
540 switch (Opcode) {
541 default:
542 return 0;
543 case ARM::MVE_VCMPf32:
544 return ARM::MVE_VPTv4f32;
545 case ARM::MVE_VCMPf16:
546 return ARM::MVE_VPTv8f16;
547 case ARM::MVE_VCMPi8:
548 return ARM::MVE_VPTv16i8;
549 case ARM::MVE_VCMPi16:
550 return ARM::MVE_VPTv8i16;
551 case ARM::MVE_VCMPi32:
552 return ARM::MVE_VPTv4i32;
553 case ARM::MVE_VCMPu8:
554 return ARM::MVE_VPTv16u8;
555 case ARM::MVE_VCMPu16:
556 return ARM::MVE_VPTv8u16;
557 case ARM::MVE_VCMPu32:
558 return ARM::MVE_VPTv4u32;
559 case ARM::MVE_VCMPs8:
560 return ARM::MVE_VPTv16s8;
561 case ARM::MVE_VCMPs16:
562 return ARM::MVE_VPTv8s16;
563 case ARM::MVE_VCMPs32:
564 return ARM::MVE_VPTv4s32;
565
566 case ARM::MVE_VCMPf32r:
567 return ARM::MVE_VPTv4f32r;
568 case ARM::MVE_VCMPf16r:
569 return ARM::MVE_VPTv8f16r;
570 case ARM::MVE_VCMPi8r:
571 return ARM::MVE_VPTv16i8r;
572 case ARM::MVE_VCMPi16r:
573 return ARM::MVE_VPTv8i16r;
574 case ARM::MVE_VCMPi32r:
575 return ARM::MVE_VPTv4i32r;
576 case ARM::MVE_VCMPu8r:
577 return ARM::MVE_VPTv16u8r;
578 case ARM::MVE_VCMPu16r:
579 return ARM::MVE_VPTv8u16r;
580 case ARM::MVE_VCMPu32r:
581 return ARM::MVE_VPTv4u32r;
582 case ARM::MVE_VCMPs8r:
583 return ARM::MVE_VPTv16s8r;
584 case ARM::MVE_VCMPs16r:
585 return ARM::MVE_VPTv8s16r;
586 case ARM::MVE_VCMPs32r:
587 return ARM::MVE_VPTv4s32r;
588 }
589}
590
591static inline
592unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
593 switch (Opcode) {
594 default:
595 llvm_unreachable("unhandled vctp opcode")::llvm::llvm_unreachable_internal("unhandled vctp opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h"
, 595)
;
596 break;
597 case ARM::MVE_VCTP8:
598 return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
599 case ARM::MVE_VCTP16:
600 return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
601 case ARM::MVE_VCTP32:
602 return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
603 case ARM::MVE_VCTP64:
604 return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
605 }
606 return 0;
607}
608
609static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
610 switch (Opcode) {
611 default:
612 llvm_unreachable("unhandled vctp opcode")::llvm::llvm_unreachable_internal("unhandled vctp opcode", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/ARM/ARMBaseInstrInfo.h"
, 612)
;
613 case ARM::MVE_VCTP8: return 16;
614 case ARM::MVE_VCTP16: return 8;
615 case ARM::MVE_VCTP32: return 4;
616 case ARM::MVE_VCTP64: return 2;
617 }
618 return 0;
619}
620
621static inline
622bool isVCTP(MachineInstr *MI) {
623 switch (MI->getOpcode()) {
8
Control jumps to the 'default' case at line 624
624 default:
625 break;
9
Execution continues on line 632
626 case ARM::MVE_VCTP8:
627 case ARM::MVE_VCTP16:
628 case ARM::MVE_VCTP32:
629 case ARM::MVE_VCTP64:
630 return true;
631 }
632 return false;
10
Returning zero, which participates in a condition later
633}
634
635static inline
636bool isLoopStart(MachineInstr &MI) {
637 return MI.getOpcode() == ARM::t2DoLoopStart ||
638 MI.getOpcode() == ARM::t2WhileLoopStart;
639}
640
641static inline
642bool isCondBranchOpcode(int Opc) {
643 return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
644}
645
646static inline bool isJumpTableBranchOpcode(int Opc) {
647 return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm_i12 ||
648 Opc == ARM::BR_JTm_rs || Opc == ARM::BR_JTadd || Opc == ARM::tBR_JTr ||
649 Opc == ARM::t2BR_JT;
650}
651
652static inline
653bool isIndirectBranchOpcode(int Opc) {
654 return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
655}
656
657static inline bool isPopOpcode(int Opc) {
658 return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
659 Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
660 Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD;
661}
662
663static inline bool isPushOpcode(int Opc) {
664 return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD ||
665 Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
666}
667
668static inline bool isSubImmOpcode(int Opc) {
669 return Opc == ARM::SUBri ||
670 Opc == ARM::tSUBi3 || Opc == ARM::tSUBi8 ||
671 Opc == ARM::tSUBSi3 || Opc == ARM::tSUBSi8 ||
672 Opc == ARM::t2SUBri || Opc == ARM::t2SUBri12 || Opc == ARM::t2SUBSri;
673}
674
675static inline bool isMovRegOpcode(int Opc) {
676 return Opc == ARM::MOVr || Opc == ARM::tMOVr || Opc == ARM::t2MOVr;
677}
678
679/// isValidCoprocessorNumber - decide whether an explicit coprocessor
680/// number is legal in generic instructions like CDP. The answer can
681/// vary with the subtarget.
682static inline bool isValidCoprocessorNumber(unsigned Num,
683 const FeatureBitset& featureBits) {
684 // Armv8-A disallows everything *other* than 111x (CP14 and CP15).
685 if (featureBits[ARM::HasV8Ops] && (Num & 0xE) != 0xE)
686 return false;
687
688 // Armv7 disallows 101x (CP10 and CP11), which clash with VFP/NEON.
689 if (featureBits[ARM::HasV7Ops] && (Num & 0xE) == 0xA)
690 return false;
691
692 // Armv8.1-M also disallows 100x (CP8,CP9) and 111x (CP14,CP15)
693 // which clash with MVE.
694 if (featureBits[ARM::HasV8_1MMainlineOps] &&
695 ((Num & 0xE) == 0x8 || (Num & 0xE) == 0xE))
696 return false;
697
698 return true;
699}
700
701/// getInstrPredicate - If instruction is predicated, returns its predicate
702/// condition, otherwise returns AL. It also returns the condition code
703/// register by reference.
704ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
705
706unsigned getMatchingCondBranchOpcode(unsigned Opc);
707
708/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
709/// the instruction is encoded with an 'S' bit is determined by the optional
710/// CPSR def operand.
711unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
712
713/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
714/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
715/// code.
716void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
717 MachineBasicBlock::iterator &MBBI,
718 const DebugLoc &dl, unsigned DestReg,
719 unsigned BaseReg, int NumBytes,
720 ARMCC::CondCodes Pred, unsigned PredReg,
721 const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
722
723void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
724 MachineBasicBlock::iterator &MBBI,
725 const DebugLoc &dl, unsigned DestReg,
726 unsigned BaseReg, int NumBytes,
727 ARMCC::CondCodes Pred, unsigned PredReg,
728 const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
729void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
730 MachineBasicBlock::iterator &MBBI,
731 const DebugLoc &dl, unsigned DestReg,
732 unsigned BaseReg, int NumBytes,
733 const TargetInstrInfo &TII,
734 const ARMBaseRegisterInfo &MRI,
735 unsigned MIFlags = 0);
736
737/// Tries to add registers to the reglist of a given base-updating
738/// push/pop instruction to adjust the stack by an additional
739/// NumBytes. This can save a few bytes per function in code-size, but
740/// obviously generates more memory traffic. As such, it only takes
741/// effect in functions being optimised for size.
742bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
743 MachineFunction &MF, MachineInstr *MI,
744 unsigned NumBytes);
745
746/// rewriteARMFrameIndex / rewriteT2FrameIndex -
747/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
748/// offset could not be handled directly in MI, and return the left-over
749/// portion by reference.
750bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
751 unsigned FrameReg, int &Offset,
752 const ARMBaseInstrInfo &TII);
753
754bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
755 unsigned FrameReg, int &Offset,
756 const ARMBaseInstrInfo &TII,
757 const TargetRegisterInfo *TRI);
758
759/// Return true if Reg is defd between From and To
760bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From,
761 MachineBasicBlock::iterator To,
762 const TargetRegisterInfo *TRI);
763
764/// Search backwards from a tBcc to find a tCMPi8 against 0, meaning
765/// we can convert them to a tCBZ or tCBNZ. Return nullptr if not found.
766MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br,
767 const TargetRegisterInfo *TRI);
768
769void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB);
770void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg);
771
772void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
773void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
774 unsigned Inactive);
775
776/// Returns the number of instructions required to materialize the given
777/// constant in a register, or 3 if a literal pool load is needed.
778/// If ForCodesize is specified, an approximate cost in bytes is returned.
779unsigned ConstantMaterializationCost(unsigned Val,
780 const ARMSubtarget *Subtarget,
781 bool ForCodesize = false);
782
783/// Returns true if Val1 has a lower Constant Materialization Cost than Val2.
784/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as
785/// specified. If the scores are equal, return the comparison for !ForCodesize.
786bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
787 const ARMSubtarget *Subtarget,
788 bool ForCodesize = false);
789
790} // end namespace llvm
791
792#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H