Line data Source code
1 : //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file This file contains a pass that performs load / store related peephole
11 : /// optimizations. This pass should be run after register allocation.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "ARM.h"
16 : #include "ARMBaseInstrInfo.h"
17 : #include "ARMBaseRegisterInfo.h"
18 : #include "ARMISelLowering.h"
19 : #include "ARMMachineFunctionInfo.h"
20 : #include "ARMSubtarget.h"
21 : #include "MCTargetDesc/ARMAddressingModes.h"
22 : #include "MCTargetDesc/ARMBaseInfo.h"
23 : #include "Utils/ARMBaseInfo.h"
24 : #include "llvm/ADT/ArrayRef.h"
25 : #include "llvm/ADT/DenseMap.h"
26 : #include "llvm/ADT/DenseSet.h"
27 : #include "llvm/ADT/STLExtras.h"
28 : #include "llvm/ADT/SmallPtrSet.h"
29 : #include "llvm/ADT/SmallSet.h"
30 : #include "llvm/ADT/SmallVector.h"
31 : #include "llvm/ADT/Statistic.h"
32 : #include "llvm/ADT/iterator_range.h"
33 : #include "llvm/Analysis/AliasAnalysis.h"
34 : #include "llvm/CodeGen/LivePhysRegs.h"
35 : #include "llvm/CodeGen/MachineBasicBlock.h"
36 : #include "llvm/CodeGen/MachineFunction.h"
37 : #include "llvm/CodeGen/MachineFunctionPass.h"
38 : #include "llvm/CodeGen/MachineInstr.h"
39 : #include "llvm/CodeGen/MachineInstrBuilder.h"
40 : #include "llvm/CodeGen/MachineMemOperand.h"
41 : #include "llvm/CodeGen/MachineOperand.h"
42 : #include "llvm/CodeGen/MachineRegisterInfo.h"
43 : #include "llvm/CodeGen/RegisterClassInfo.h"
44 : #include "llvm/CodeGen/TargetFrameLowering.h"
45 : #include "llvm/CodeGen/TargetInstrInfo.h"
46 : #include "llvm/CodeGen/TargetLowering.h"
47 : #include "llvm/CodeGen/TargetRegisterInfo.h"
48 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 : #include "llvm/IR/DataLayout.h"
50 : #include "llvm/IR/DebugLoc.h"
51 : #include "llvm/IR/DerivedTypes.h"
52 : #include "llvm/IR/Function.h"
53 : #include "llvm/IR/Type.h"
54 : #include "llvm/MC/MCInstrDesc.h"
55 : #include "llvm/Pass.h"
56 : #include "llvm/Support/Allocator.h"
57 : #include "llvm/Support/CommandLine.h"
58 : #include "llvm/Support/Debug.h"
59 : #include "llvm/Support/ErrorHandling.h"
60 : #include "llvm/Support/raw_ostream.h"
61 : #include <algorithm>
62 : #include <cassert>
63 : #include <cstddef>
64 : #include <cstdlib>
65 : #include <iterator>
66 : #include <limits>
67 : #include <utility>
68 :
69 : using namespace llvm;
70 :
71 : #define DEBUG_TYPE "arm-ldst-opt"
72 :
73 : STATISTIC(NumLDMGened , "Number of ldm instructions generated");
74 : STATISTIC(NumSTMGened , "Number of stm instructions generated");
75 : STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
76 : STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
77 : STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
78 : STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
79 : STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
80 : STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
81 : STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
82 : STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
83 : STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
84 :
85 : /// This switch disables formation of double/multi instructions that could
86 : /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
87 : /// disabled. This can be used to create libraries that are robust even when
88 : /// users provoke undefined behaviour by supplying misaligned pointers.
89 : /// \see mayCombineMisaligned()
90 : static cl::opt<bool>
91 : AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
92 : cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
93 :
94 : #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
95 :
96 : namespace {
97 :
98 : /// Post- register allocation pass the combine load / store instructions to
99 : /// form ldm / stm instructions.
100 : struct ARMLoadStoreOpt : public MachineFunctionPass {
101 : static char ID;
102 :
103 : const MachineFunction *MF;
104 : const TargetInstrInfo *TII;
105 : const TargetRegisterInfo *TRI;
106 : const ARMSubtarget *STI;
107 : const TargetLowering *TL;
108 : ARMFunctionInfo *AFI;
109 : LivePhysRegs LiveRegs;
110 : RegisterClassInfo RegClassInfo;
111 : MachineBasicBlock::const_iterator LiveRegPos;
112 : bool LiveRegsValid;
113 : bool RegClassInfoValid;
114 : bool isThumb1, isThumb2;
115 :
116 2573 : ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
117 :
118 : bool runOnMachineFunction(MachineFunction &Fn) override;
119 :
120 2562 : MachineFunctionProperties getRequiredProperties() const override {
121 2562 : return MachineFunctionProperties().set(
122 2562 : MachineFunctionProperties::Property::NoVRegs);
123 : }
124 :
125 2561 : StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
126 :
127 : private:
128 : /// A set of load/store MachineInstrs with same base register sorted by
129 : /// offset.
130 : struct MemOpQueueEntry {
131 : MachineInstr *MI;
132 : int Offset; ///< Load/Store offset.
133 : unsigned Position; ///< Position as counted from end of basic block.
134 :
135 : MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
136 12328 : : MI(&MI), Offset(Offset), Position(Position) {}
137 : };
138 : using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
139 :
140 : /// A set of MachineInstrs that fulfill (nearly all) conditions to get
141 : /// merged into a LDM/STM.
142 10157 : struct MergeCandidate {
143 : /// List of instructions ordered by load/store offset.
144 : SmallVector<MachineInstr*, 4> Instrs;
145 :
146 : /// Index in Instrs of the instruction being latest in the schedule.
147 : unsigned LatestMIIdx;
148 :
149 : /// Index in Instrs of the instruction being earliest in the schedule.
150 : unsigned EarliestMIIdx;
151 :
152 : /// Index into the basic block where the merged instruction will be
153 : /// inserted. (See MemOpQueueEntry.Position)
154 : unsigned InsertPos;
155 :
156 : /// Whether the instructions can be merged into a ldm/stm instruction.
157 : bool CanMergeToLSMulti;
158 :
159 : /// Whether the instructions can be merged into a ldrd/strd instruction.
160 : bool CanMergeToLSDouble;
161 : };
162 : SpecificBumpPtrAllocator<MergeCandidate> Allocator;
163 : SmallVector<const MergeCandidate*,4> Candidates;
164 : SmallVector<MachineInstr*,4> MergeBaseCandidates;
165 :
166 : void moveLiveRegsBefore(const MachineBasicBlock &MBB,
167 : MachineBasicBlock::const_iterator Before);
168 : unsigned findFreeReg(const TargetRegisterClass &RegClass);
169 : void UpdateBaseRegUses(MachineBasicBlock &MBB,
170 : MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
171 : unsigned Base, unsigned WordOffset,
172 : ARMCC::CondCodes Pred, unsigned PredReg);
173 : MachineInstr *CreateLoadStoreMulti(
174 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
175 : int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
176 : ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
177 : ArrayRef<std::pair<unsigned, bool>> Regs);
178 : MachineInstr *CreateLoadStoreDouble(
179 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
180 : int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
181 : ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
182 : ArrayRef<std::pair<unsigned, bool>> Regs) const;
183 : void FormCandidates(const MemOpQueue &MemOps);
184 : MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
185 : bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
186 : MachineBasicBlock::iterator &MBBI);
187 : bool MergeBaseUpdateLoadStore(MachineInstr *MI);
188 : bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
189 : bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
190 : bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
191 : bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
192 : bool CombineMovBx(MachineBasicBlock &MBB);
193 : };
194 :
195 : } // end anonymous namespace
196 :
197 : char ARMLoadStoreOpt::ID = 0;
198 :
199 199024 : INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
200 : false)
201 :
202 85 : static bool definesCPSR(const MachineInstr &MI) {
203 589 : for (const auto &MO : MI.operands()) {
204 504 : if (!MO.isReg())
205 : continue;
206 334 : if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
207 : // If the instruction has live CPSR def, then it's not safe to fold it
208 : // into load / store.
209 : return true;
210 : }
211 :
212 : return false;
213 : }
214 :
215 51645 : static int getMemoryOpOffset(const MachineInstr &MI) {
216 51645 : unsigned Opcode = MI.getOpcode();
217 51645 : bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
218 51645 : unsigned NumOperands = MI.getDesc().getNumOperands();
219 51645 : unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
220 :
221 103290 : if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
222 51645 : Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
223 44518 : Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
224 44509 : Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
225 40017 : return OffField;
226 :
227 : // Thumb1 immediate offsets are scaled by 4
228 11628 : if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
229 9012 : Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
230 4073 : return OffField * 4;
231 :
232 7555 : int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
233 7540 : : ARM_AM::getAM5Offset(OffField) * 4;
234 7555 : ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
235 : : ARM_AM::getAM5Op(OffField);
236 :
237 : if (Op == ARM_AM::sub)
238 48 : return -Offset;
239 :
240 : return Offset;
241 : }
242 :
243 : static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
244 1039 : return MI.getOperand(1);
245 : }
246 :
247 : static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
248 16283 : return MI.getOperand(0);
249 : }
250 :
251 3573 : static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
252 3573 : switch (Opcode) {
253 0 : default: llvm_unreachable("Unhandled opcode!");
254 : case ARM::LDRi12:
255 : ++NumLDMGened;
256 : switch (Mode) {
257 0 : default: llvm_unreachable("Unhandled submode!");
258 : case ARM_AM::ia: return ARM::LDMIA;
259 : case ARM_AM::da: return ARM::LDMDA;
260 : case ARM_AM::db: return ARM::LDMDB;
261 : case ARM_AM::ib: return ARM::LDMIB;
262 : }
263 : case ARM::STRi12:
264 : ++NumSTMGened;
265 : switch (Mode) {
266 0 : default: llvm_unreachable("Unhandled submode!");
267 : case ARM_AM::ia: return ARM::STMIA;
268 : case ARM_AM::da: return ARM::STMDA;
269 : case ARM_AM::db: return ARM::STMDB;
270 : case ARM_AM::ib: return ARM::STMIB;
271 : }
272 : case ARM::tLDRi:
273 : case ARM::tLDRspi:
274 : // tLDMIA is writeback-only - unless the base register is in the input
275 : // reglist.
276 : ++NumLDMGened;
277 339 : switch (Mode) {
278 0 : default: llvm_unreachable("Unhandled submode!");
279 : case ARM_AM::ia: return ARM::tLDMIA;
280 : }
281 : case ARM::tSTRi:
282 : case ARM::tSTRspi:
283 : // There is no non-writeback tSTMIA either.
284 : ++NumSTMGened;
285 174 : switch (Mode) {
286 0 : default: llvm_unreachable("Unhandled submode!");
287 : case ARM_AM::ia: return ARM::tSTMIA_UPD;
288 : }
289 : case ARM::t2LDRi8:
290 : case ARM::t2LDRi12:
291 : ++NumLDMGened;
292 355 : switch (Mode) {
293 0 : default: llvm_unreachable("Unhandled submode!");
294 : case ARM_AM::ia: return ARM::t2LDMIA;
295 0 : case ARM_AM::db: return ARM::t2LDMDB;
296 : }
297 : case ARM::t2STRi8:
298 : case ARM::t2STRi12:
299 : ++NumSTMGened;
300 411 : switch (Mode) {
301 0 : default: llvm_unreachable("Unhandled submode!");
302 : case ARM_AM::ia: return ARM::t2STMIA;
303 0 : case ARM_AM::db: return ARM::t2STMDB;
304 : }
305 : case ARM::VLDRS:
306 : ++NumVLDMGened;
307 41 : switch (Mode) {
308 0 : default: llvm_unreachable("Unhandled submode!");
309 : case ARM_AM::ia: return ARM::VLDMSIA;
310 0 : case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
311 : }
312 : case ARM::VSTRS:
313 : ++NumVSTMGened;
314 24 : switch (Mode) {
315 0 : default: llvm_unreachable("Unhandled submode!");
316 : case ARM_AM::ia: return ARM::VSTMSIA;
317 0 : case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
318 : }
319 : case ARM::VLDRD:
320 : ++NumVLDMGened;
321 211 : switch (Mode) {
322 0 : default: llvm_unreachable("Unhandled submode!");
323 : case ARM_AM::ia: return ARM::VLDMDIA;
324 0 : case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
325 : }
326 : case ARM::VSTRD:
327 : ++NumVSTMGened;
328 100 : switch (Mode) {
329 0 : default: llvm_unreachable("Unhandled submode!");
330 : case ARM_AM::ia: return ARM::VSTMDIA;
331 0 : case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
332 : }
333 : }
334 : }
335 :
336 265 : static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
337 265 : switch (Opcode) {
338 0 : default: llvm_unreachable("Unhandled opcode!");
339 : case ARM::LDMIA_RET:
340 : case ARM::LDMIA:
341 : case ARM::LDMIA_UPD:
342 : case ARM::STMIA:
343 : case ARM::STMIA_UPD:
344 : case ARM::tLDMIA:
345 : case ARM::tLDMIA_UPD:
346 : case ARM::tSTMIA_UPD:
347 : case ARM::t2LDMIA_RET:
348 : case ARM::t2LDMIA:
349 : case ARM::t2LDMIA_UPD:
350 : case ARM::t2STMIA:
351 : case ARM::t2STMIA_UPD:
352 : case ARM::VLDMSIA:
353 : case ARM::VLDMSIA_UPD:
354 : case ARM::VSTMSIA:
355 : case ARM::VSTMSIA_UPD:
356 : case ARM::VLDMDIA:
357 : case ARM::VLDMDIA_UPD:
358 : case ARM::VSTMDIA:
359 : case ARM::VSTMDIA_UPD:
360 : return ARM_AM::ia;
361 :
362 0 : case ARM::LDMDA:
363 : case ARM::LDMDA_UPD:
364 : case ARM::STMDA:
365 : case ARM::STMDA_UPD:
366 0 : return ARM_AM::da;
367 :
368 0 : case ARM::LDMDB:
369 : case ARM::LDMDB_UPD:
370 : case ARM::STMDB:
371 : case ARM::STMDB_UPD:
372 : case ARM::t2LDMDB:
373 : case ARM::t2LDMDB_UPD:
374 : case ARM::t2STMDB:
375 : case ARM::t2STMDB_UPD:
376 : case ARM::VLDMSDB_UPD:
377 : case ARM::VSTMSDB_UPD:
378 : case ARM::VLDMDDB_UPD:
379 : case ARM::VSTMDDB_UPD:
380 0 : return ARM_AM::db;
381 :
382 52 : case ARM::LDMIB:
383 : case ARM::LDMIB_UPD:
384 : case ARM::STMIB:
385 : case ARM::STMIB_UPD:
386 52 : return ARM_AM::ib;
387 : }
388 : }
389 :
390 : static bool isT1i32Load(unsigned Opc) {
391 17694 : return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
392 : }
393 :
394 : static bool isT2i32Load(unsigned Opc) {
395 15579 : return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
396 : }
397 :
398 : static bool isi32Load(unsigned Opc) {
399 22663 : return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
400 : }
401 :
402 : static bool isT1i32Store(unsigned Opc) {
403 3498 : return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
404 : }
405 :
406 : static bool isT2i32Store(unsigned Opc) {
407 3019 : return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
408 : }
409 :
410 : static bool isi32Store(unsigned Opc) {
411 5730 : return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
412 : }
413 :
414 : static bool isLoadSingle(unsigned Opc) {
415 6645 : return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
416 : }
417 :
418 : static unsigned getImmScale(unsigned Opc) {
419 : switch (Opc) {
420 0 : default: llvm_unreachable("Unhandled opcode!");
421 : case ARM::tLDRi:
422 : case ARM::tSTRi:
423 : case ARM::tLDRspi:
424 : case ARM::tSTRspi:
425 : return 1;
426 : case ARM::tLDRHi:
427 : case ARM::tSTRHi:
428 : return 2;
429 : case ARM::tLDRBi:
430 : case ARM::tSTRBi:
431 : return 4;
432 : }
433 : }
434 :
435 15021 : static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
436 30042 : switch (MI->getOpcode()) {
437 : default: return 0;
438 11106 : case ARM::LDRi12:
439 : case ARM::STRi12:
440 : case ARM::tLDRi:
441 : case ARM::tSTRi:
442 : case ARM::tLDRspi:
443 : case ARM::tSTRspi:
444 : case ARM::t2LDRi8:
445 : case ARM::t2LDRi12:
446 : case ARM::t2STRi8:
447 : case ARM::t2STRi12:
448 : case ARM::VLDRS:
449 : case ARM::VSTRS:
450 11106 : return 4;
451 3650 : case ARM::VLDRD:
452 : case ARM::VSTRD:
453 3650 : return 8;
454 198 : case ARM::LDMIA:
455 : case ARM::LDMDA:
456 : case ARM::LDMDB:
457 : case ARM::LDMIB:
458 : case ARM::STMIA:
459 : case ARM::STMDA:
460 : case ARM::STMDB:
461 : case ARM::STMIB:
462 : case ARM::tLDMIA:
463 : case ARM::tLDMIA_UPD:
464 : case ARM::tSTMIA_UPD:
465 : case ARM::t2LDMIA:
466 : case ARM::t2LDMDB:
467 : case ARM::t2STMIA:
468 : case ARM::t2STMDB:
469 : case ARM::VLDMSIA:
470 : case ARM::VSTMSIA:
471 396 : return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
472 67 : case ARM::VLDMDIA:
473 : case ARM::VSTMDIA:
474 134 : return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
475 : }
476 : }
477 :
478 : /// Update future uses of the base register with the offset introduced
479 : /// due to writeback. This function only works on Thumb1.
480 0 : void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
481 : MachineBasicBlock::iterator MBBI,
482 : const DebugLoc &DL, unsigned Base,
483 : unsigned WordOffset,
484 : ARMCC::CondCodes Pred,
485 : unsigned PredReg) {
486 : assert(isThumb1 && "Can only update base register uses for Thumb1!");
487 : // Start updating any instructions with immediate offsets. Insert a SUB before
488 : // the first non-updateable instruction (if any).
489 0 : for (; MBBI != MBB.end(); ++MBBI) {
490 : bool InsertSub = false;
491 0 : unsigned Opc = MBBI->getOpcode();
492 :
493 0 : if (MBBI->readsRegister(Base)) {
494 : int Offset;
495 : bool IsLoad =
496 0 : Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
497 : bool IsStore =
498 0 : Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
499 :
500 0 : if (IsLoad || IsStore) {
501 : // Loads and stores with immediate offsets can be updated, but only if
502 : // the new offset isn't negative.
503 : // The MachineOperand containing the offset immediate is the last one
504 : // before predicates.
505 : MachineOperand &MO =
506 0 : MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
507 : // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
508 0 : Offset = MO.getImm() - WordOffset * getImmScale(Opc);
509 :
510 : // If storing the base register, it needs to be reset first.
511 0 : unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
512 :
513 0 : if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
514 0 : MO.setImm(Offset);
515 : else
516 : InsertSub = true;
517 0 : } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
518 0 : !definesCPSR(*MBBI)) {
519 : // SUBS/ADDS using this register, with a dead def of the CPSR.
520 : // Merge it with the update; if the merged offset is too large,
521 : // insert a new sub instead.
522 : MachineOperand &MO =
523 0 : MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
524 0 : Offset = (Opc == ARM::tSUBi8) ?
525 0 : MO.getImm() + WordOffset * 4 :
526 0 : MO.getImm() - WordOffset * 4 ;
527 0 : if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
528 : // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
529 : // Offset == 0.
530 : MO.setImm(Offset);
531 : // The base register has now been reset, so exit early.
532 0 : return;
533 : } else {
534 : InsertSub = true;
535 : }
536 : } else {
537 : // Can't update the instruction.
538 : InsertSub = true;
539 : }
540 0 : } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
541 : // Since SUBS sets the condition flags, we can't place the base reset
542 : // after an instruction that has a live CPSR def.
543 : // The base register might also contain an argument for a function call.
544 : InsertSub = true;
545 : }
546 :
547 : if (InsertSub) {
548 : // An instruction above couldn't be updated, so insert a sub.
549 0 : BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
550 0 : .add(t1CondCodeOp(true))
551 0 : .addReg(Base)
552 0 : .addImm(WordOffset * 4)
553 0 : .addImm(Pred)
554 0 : .addReg(PredReg);
555 0 : return;
556 : }
557 :
558 0 : if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
559 : // Register got killed. Stop updating.
560 0 : return;
561 : }
562 :
563 : // End of block was reached.
564 0 : if (MBB.succ_size() > 0) {
565 : // FIXME: Because of a bug, live registers are sometimes missing from
566 : // the successor blocks' live-in sets. This means we can't trust that
567 : // information and *always* have to reset at the end of a block.
568 : // See PR21029.
569 : if (MBBI != MBB.end()) --MBBI;
570 0 : BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
571 0 : .add(t1CondCodeOp(true))
572 0 : .addReg(Base)
573 0 : .addImm(WordOffset * 4)
574 0 : .addImm(Pred)
575 0 : .addReg(PredReg);
576 : }
577 : }
578 :
579 : /// Return the first register of class \p RegClass that is not in \p Regs.
580 66 : unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
581 66 : if (!RegClassInfoValid) {
582 64 : RegClassInfo.runOnMachineFunction(*MF);
583 64 : RegClassInfoValid = true;
584 : }
585 :
586 318 : for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
587 311 : if (!LiveRegs.contains(Reg))
588 59 : return Reg;
589 : return 0;
590 : }
591 :
592 : /// Compute live registers just before instruction \p Before (in normal schedule
593 : /// direction). Computes backwards so multiple queries in the same block must
594 : /// come in reverse order.
595 66 : void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
596 : MachineBasicBlock::const_iterator Before) {
597 : // Initialize if we never queried in this block.
598 66 : if (!LiveRegsValid) {
599 64 : LiveRegs.init(*TRI);
600 64 : LiveRegs.addLiveOuts(MBB);
601 64 : LiveRegPos = MBB.end();
602 64 : LiveRegsValid = true;
603 : }
604 : // Move backward just before the "Before" position.
605 938 : while (LiveRegPos != Before) {
606 : --LiveRegPos;
607 1744 : LiveRegs.stepBackward(*LiveRegPos);
608 : }
609 66 : }
610 :
611 : static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
612 : unsigned Reg) {
613 839 : for (const std::pair<unsigned, bool> &R : Regs)
614 660 : if (R.first == Reg)
615 : return true;
616 : return false;
617 : }
618 :
619 : /// Create and insert a LDM or STM with Base as base register and registers in
620 : /// Regs as the register operands that would be loaded / stored. It returns
621 : /// true if the transformation is done.
622 839 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
623 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
624 : int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
625 : ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
626 : ArrayRef<std::pair<unsigned, bool>> Regs) {
627 839 : unsigned NumRegs = Regs.size();
628 : assert(NumRegs > 1);
629 :
630 : // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
631 : // Compute liveness information for that register to make the decision.
632 1023 : bool SafeToClobberCPSR = !isThumb1 ||
633 184 : (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
634 : MachineBasicBlock::LQR_Dead);
635 :
636 839 : bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
637 :
638 : // Exception: If the base register is in the input reglist, Thumb1 LDM is
639 : // non-writeback.
640 : // It's also not possible to merge an STR of the base register in Thumb1.
641 1023 : if (isThumb1 && ContainsReg(Regs, Base)) {
642 : assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
643 6 : if (Opcode == ARM::tLDRi)
644 : Writeback = false;
645 2 : else if (Opcode == ARM::tSTRi)
646 : return nullptr;
647 : }
648 :
649 : ARM_AM::AMSubMode Mode = ARM_AM::ia;
650 : // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
651 : bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
652 750 : bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
653 :
654 837 : if (Offset == 4 && haveIBAndDA) {
655 : Mode = ARM_AM::ib;
656 783 : } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
657 : Mode = ARM_AM::da;
658 783 : } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
659 : // VLDM/VSTM do not support DB mode without also updating the base reg.
660 : Mode = ARM_AM::db;
661 783 : } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
662 : // Check if this is a supported opcode before inserting instructions to
663 : // calculate a new base register.
664 593 : if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
665 :
666 : // If starting offset isn't zero, insert a MI to materialize a new base.
667 : // But only do so if it is cost effective, i.e. merging more than two
668 : // loads / stores.
669 593 : if (NumRegs <= 2)
670 : return nullptr;
671 :
672 : // On Thumb1, it's not worth materializing a new base register without
673 : // clobbering the CPSR (i.e. not using ADDS/SUBS).
674 439 : if (!SafeToClobberCPSR)
675 : return nullptr;
676 :
677 : unsigned NewBase;
678 : if (isi32Load(Opcode)) {
679 : // If it is a load, then just use one of the destination registers
680 : // as the new base. Will no longer be writeback in Thumb1.
681 746 : NewBase = Regs[NumRegs-1].first;
682 : Writeback = false;
683 : } else {
684 : // Find a free register that we can use as scratch register.
685 66 : moveLiveRegsBefore(MBB, InsertBefore);
686 : // The merged instruction does not exist yet but will use several Regs if
687 : // it is a Store.
688 : if (!isLoadSingle(Opcode))
689 289 : for (const std::pair<unsigned, bool> &R : Regs)
690 229 : LiveRegs.addReg(R.first);
691 :
692 111 : NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
693 66 : if (NewBase == 0)
694 : return nullptr;
695 : }
696 :
697 : int BaseOpc =
698 645 : isThumb2 ? ARM::t2ADDri :
699 340 : (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
700 12 : (isThumb1 && Offset < 8) ? ARM::tADDi3 :
701 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
702 :
703 432 : if (Offset < 0) {
704 0 : Offset = - Offset;
705 : BaseOpc =
706 0 : isThumb2 ? ARM::t2SUBri :
707 0 : (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
708 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
709 : }
710 :
711 432 : if (!TL->isLegalAddImmediate(Offset))
712 : // FIXME: Try add with register operand?
713 : return nullptr; // Probably not worth it then.
714 :
715 : // We can only append a kill flag to the add/sub input if the value is not
716 : // used in the register list of the stm as well.
717 369 : bool KillOldBase = BaseKill &&
718 3 : (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
719 :
720 369 : if (isThumb1) {
721 : // Thumb1: depending on immediate size, use either
722 : // ADDS NewBase, Base, #imm3
723 : // or
724 : // MOV NewBase, Base
725 : // ADDS NewBase, #imm8.
726 88 : if (Base != NewBase &&
727 85 : (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
728 : // Need to insert a MOV to the new base first.
729 7 : if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
730 7 : !STI->hasV6Ops()) {
731 : // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
732 4 : if (Pred != ARMCC::AL)
733 : return nullptr;
734 12 : BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
735 4 : .addReg(Base, getKillRegState(KillOldBase));
736 : } else
737 6 : BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
738 3 : .addReg(Base, getKillRegState(KillOldBase))
739 3 : .add(predOps(Pred, PredReg));
740 :
741 : // The following ADDS/SUBS becomes an update.
742 : Base = NewBase;
743 : KillOldBase = true;
744 : }
745 88 : if (BaseOpc == ARM::tADDrSPi) {
746 : assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
747 152 : BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
748 76 : .addReg(Base, getKillRegState(KillOldBase))
749 76 : .addImm(Offset / 4)
750 76 : .add(predOps(Pred, PredReg));
751 : } else
752 24 : BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
753 12 : .add(t1CondCodeOp(true))
754 12 : .addReg(Base, getKillRegState(KillOldBase))
755 : .addImm(Offset)
756 12 : .add(predOps(Pred, PredReg));
757 : } else {
758 562 : BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
759 281 : .addReg(Base, getKillRegState(KillOldBase))
760 : .addImm(Offset)
761 281 : .add(predOps(Pred, PredReg))
762 281 : .add(condCodeOp());
763 : }
764 : Base = NewBase;
765 : BaseKill = true; // New base is always killed straight away.
766 : }
767 :
768 : bool isDef = isLoadSingle(Opcode);
769 :
770 : // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
771 : // base register writeback.
772 613 : Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
773 613 : if (!Opcode)
774 : return nullptr;
775 :
776 : // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
777 : // - There is no writeback (LDM of base register),
778 : // - the base register is killed by the merged instruction,
779 : // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
780 : // to reset the base register.
781 : // Otherwise, don't merge.
782 : // It's safe to return here since the code to materialize a new base register
783 : // above is also conditional on SafeToClobberCPSR.
784 613 : if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
785 : return nullptr;
786 :
787 613 : MachineInstrBuilder MIB;
788 :
789 613 : if (Writeback) {
790 : assert(isThumb1 && "expected Writeback only inThumb1");
791 32 : if (Opcode == ARM::tLDMIA) {
792 : assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
793 : // Update tLDMIA with writeback if necessary.
794 : Opcode = ARM::tLDMIA_UPD;
795 : }
796 :
797 64 : MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
798 :
799 : // Thumb1: we might need to set base writeback when building the MI.
800 32 : MIB.addReg(Base, getDefRegState(true))
801 32 : .addReg(Base, getKillRegState(BaseKill));
802 :
803 : // The base isn't dead after a merged instruction with writeback.
804 : // Insert a sub instruction after the newly formed instruction to reset.
805 32 : if (!BaseKill)
806 8 : UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
807 : } else {
808 : // No writeback, simply build the MachineInstr.
809 1162 : MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
810 581 : MIB.addReg(Base, getKillRegState(BaseKill));
811 : }
812 :
813 613 : MIB.addImm(Pred).addReg(PredReg);
814 :
815 2881 : for (const std::pair<unsigned, bool> &R : Regs)
816 4085 : MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
817 :
818 613 : return MIB.getInstr();
819 : }
820 :
821 157 : MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
822 : MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
823 : int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
824 : ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
825 : ArrayRef<std::pair<unsigned, bool>> Regs) const {
826 : bool IsLoad = isi32Load(Opcode);
827 : assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
828 : unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
829 :
830 : assert(Regs.size() == 2);
831 : MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
832 314 : TII->get(LoadStoreOpcode));
833 157 : if (IsLoad) {
834 64 : MIB.addReg(Regs[0].first, RegState::Define)
835 64 : .addReg(Regs[1].first, RegState::Define);
836 : } else {
837 137 : MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
838 126 : .addReg(Regs[1].first, getKillRegState(Regs[1].second));
839 : }
840 157 : MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
841 157 : return MIB.getInstr();
842 : }
843 :
844 : /// Call MergeOps and update MemOps and merges accordingly on success.
845 996 : MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
846 996 : const MachineInstr *First = Cand.Instrs.front();
847 996 : unsigned Opcode = First->getOpcode();
848 : bool IsLoad = isLoadSingle(Opcode);
849 : SmallVector<std::pair<unsigned, bool>, 8> Regs;
850 : SmallVector<unsigned, 4> ImpDefs;
851 : DenseSet<unsigned> KilledRegs;
852 : DenseSet<unsigned> UsedRegs;
853 : // Determine list of registers and list of implicit super-register defs.
854 4163 : for (const MachineInstr *MI : Cand.Instrs) {
855 : const MachineOperand &MO = getLoadStoreRegOp(*MI);
856 3167 : unsigned Reg = MO.getReg();
857 : bool IsKill = MO.isKill();
858 3167 : if (IsKill)
859 : KilledRegs.insert(Reg);
860 6334 : Regs.push_back(std::make_pair(Reg, IsKill));
861 : UsedRegs.insert(Reg);
862 :
863 3167 : if (IsLoad) {
864 : // Collect any implicit defs of super-registers, after merging we can't
865 : // be sure anymore that we properly preserved these live ranges and must
866 : // removed these implicit operands.
867 2287 : for (const MachineOperand &MO : MI->implicit_operands()) {
868 110 : if (!MO.isReg() || !MO.isDef() || MO.isDead())
869 82 : continue;
870 : assert(MO.isImplicit());
871 69 : unsigned DefReg = MO.getReg();
872 :
873 69 : if (is_contained(ImpDefs, DefReg))
874 : continue;
875 : // We can ignore cases where the super-reg is read and written.
876 56 : if (MI->readsRegister(DefReg))
877 : continue;
878 28 : ImpDefs.push_back(DefReg);
879 : }
880 : }
881 : }
882 :
883 : // Attempt the merge.
884 : using iterator = MachineBasicBlock::iterator;
885 :
886 1992 : MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
887 996 : iterator InsertBefore = std::next(iterator(LatestMI));
888 996 : MachineBasicBlock &MBB = *LatestMI->getParent();
889 996 : unsigned Offset = getMemoryOpOffset(*First);
890 996 : unsigned Base = getLoadStoreBaseOp(*First).getReg();
891 : bool BaseKill = LatestMI->killsRegister(Base);
892 996 : unsigned PredReg = 0;
893 996 : ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
894 : DebugLoc DL = First->getDebugLoc();
895 : MachineInstr *Merged = nullptr;
896 996 : if (Cand.CanMergeToLSDouble)
897 157 : Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
898 : Opcode, Pred, PredReg, DL, Regs);
899 996 : if (!Merged && Cand.CanMergeToLSMulti)
900 839 : Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
901 : Opcode, Pred, PredReg, DL, Regs);
902 996 : if (!Merged)
903 : return nullptr;
904 :
905 : // Determine earliest instruction that will get removed. We then keep an
906 : // iterator just above it so the following erases don't invalidated it.
907 1540 : iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
908 : bool EarliestAtBegin = false;
909 770 : if (EarliestI == MBB.begin()) {
910 : EarliestAtBegin = true;
911 : } else {
912 712 : EarliestI = std::prev(EarliestI);
913 : }
914 :
915 : // Remove instructions which have been merged.
916 3352 : for (MachineInstr *MI : Cand.Instrs)
917 : MBB.erase(MI);
918 :
919 : // Determine range between the earliest removed instruction and the new one.
920 770 : if (EarliestAtBegin)
921 : EarliestI = MBB.begin();
922 : else
923 712 : EarliestI = std::next(EarliestI);
924 : auto FixupRange = make_range(EarliestI, iterator(Merged));
925 :
926 : if (isLoadSingle(Opcode)) {
927 : // If the previous loads defined a super-reg, then we have to mark earlier
928 : // operands undef; Replicate the super-reg def on the merged instruction.
929 823 : for (MachineInstr &MI : FixupRange) {
930 331 : for (unsigned &ImpDefReg : ImpDefs) {
931 11 : for (MachineOperand &MO : MI.implicit_operands()) {
932 2 : if (!MO.isReg() || MO.getReg() != ImpDefReg)
933 : continue;
934 : if (MO.readsReg())
935 : MO.setIsUndef();
936 1 : else if (MO.isDef())
937 1 : ImpDefReg = 0;
938 : }
939 : }
940 : }
941 :
942 501 : MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
943 528 : for (unsigned ImpDef : ImpDefs)
944 27 : MIB.addReg(ImpDef, RegState::ImplicitDefine);
945 : } else {
946 : // Remove kill flags: We are possibly storing the values later now.
947 : assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
948 335 : for (MachineInstr &MI : FixupRange) {
949 368 : for (MachineOperand &MO : MI.uses()) {
950 302 : if (!MO.isReg() || !MO.isKill())
951 : continue;
952 10 : if (UsedRegs.count(MO.getReg()))
953 : MO.setIsKill(false);
954 : }
955 : }
956 : assert(ImpDefs.empty());
957 : }
958 :
959 : return Merged;
960 : }
961 :
962 : static bool isValidLSDoubleOffset(int Offset) {
963 2000 : unsigned Value = abs(Offset);
964 : // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
965 : // multiplied by 4.
966 2000 : return (Value % 4) == 0 && Value < 1024;
967 : }
968 :
969 : /// Return true for loads/stores that can be combined to a double/multi
970 : /// operation without increasing the requirements for alignment.
971 46 : static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
972 : const MachineInstr &MI) {
973 : // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
974 : // difference.
975 46 : unsigned Opcode = MI.getOpcode();
976 : if (!isi32Load(Opcode) && !isi32Store(Opcode))
977 : return true;
978 :
979 : // Stack pointer alignment is out of the programmers control so we can trust
980 : // SP-relative loads/stores.
981 43 : if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
982 4 : STI.getFrameLowering()->getTransientStackAlignment() >= 4)
983 4 : return true;
984 : return false;
985 : }
986 :
987 : /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
988 9228 : void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
989 9228 : const MachineInstr *FirstMI = MemOps[0].MI;
990 9228 : unsigned Opcode = FirstMI->getOpcode();
991 : bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
992 9228 : unsigned Size = getLSMultipleTransferSize(FirstMI);
993 :
994 : unsigned SIndex = 0;
995 9228 : unsigned EIndex = MemOps.size();
996 : do {
997 : // Look at the first instruction.
998 10157 : const MachineInstr *MI = MemOps[SIndex].MI;
999 10157 : int Offset = MemOps[SIndex].Offset;
1000 : const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1001 10157 : unsigned PReg = PMO.getReg();
1002 10157 : unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1003 10157 : : TRI->getEncodingValue(PReg);
1004 : unsigned Latest = SIndex;
1005 : unsigned Earliest = SIndex;
1006 : unsigned Count = 1;
1007 : bool CanMergeToLSDouble =
1008 12593 : STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1009 : // ARM errata 602117: LDRD with base in list may result in incorrect base
1010 : // register when interrupted or faulted.
1011 10157 : if (STI->isCortexM3() && isi32Load(Opcode) &&
1012 37 : PReg == getLoadStoreBaseOp(*MI).getReg())
1013 : CanMergeToLSDouble = false;
1014 :
1015 : bool CanMergeToLSMulti = true;
1016 : // On swift vldm/vstm starting with an odd register number as that needs
1017 : // more uops than single vldrs.
1018 10157 : if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1019 : CanMergeToLSMulti = false;
1020 :
1021 : // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1022 : // deprecated; LDM to PC is fine but cannot happen here.
1023 10157 : if (PReg == ARM::SP || PReg == ARM::PC)
1024 : CanMergeToLSMulti = CanMergeToLSDouble = false;
1025 :
1026 : // Should we be conservative?
1027 10157 : if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
1028 : CanMergeToLSMulti = CanMergeToLSDouble = false;
1029 :
1030 : // vldm / vstm limit are 32 for S variants, 16 for D variants.
1031 : unsigned Limit;
1032 10157 : switch (Opcode) {
1033 : default:
1034 : Limit = UINT_MAX;
1035 : break;
1036 1996 : case ARM::VLDRD:
1037 : case ARM::VSTRD:
1038 : Limit = 16;
1039 1996 : break;
1040 : }
1041 :
1042 : // Merge following instructions where possible.
1043 12328 : for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1044 3100 : int NewOffset = MemOps[I].Offset;
1045 3100 : if (NewOffset != Offset + (int)Size)
1046 : break;
1047 2959 : const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1048 2959 : unsigned Reg = MO.getReg();
1049 2959 : if (Reg == ARM::SP || Reg == ARM::PC)
1050 : break;
1051 2959 : if (Count == Limit)
1052 : break;
1053 :
1054 : // See if the current load/store may be part of a multi load/store.
1055 2958 : unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1056 2958 : : TRI->getEncodingValue(Reg);
1057 : bool PartOfLSMulti = CanMergeToLSMulti;
1058 2958 : if (PartOfLSMulti) {
1059 : // Register numbers must be in ascending order.
1060 2903 : if (RegNum <= PRegNum)
1061 : PartOfLSMulti = false;
1062 : // For VFP / NEON load/store multiples, the registers must be
1063 : // consecutive and within the limit on the number of registers per
1064 : // instruction.
1065 2156 : else if (!isNotVFP && RegNum != PRegNum+1)
1066 : PartOfLSMulti = false;
1067 : }
1068 : // See if the current load/store may be part of a double load/store.
1069 2958 : bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1070 :
1071 2958 : if (!PartOfLSMulti && !PartOfLSDouble)
1072 : break;
1073 : CanMergeToLSMulti &= PartOfLSMulti;
1074 : CanMergeToLSDouble &= PartOfLSDouble;
1075 : // Track MemOp with latest and earliest position (Positions are
1076 : // counted in reverse).
1077 2171 : unsigned Position = MemOps[I].Position;
1078 4342 : if (Position < MemOps[Latest].Position)
1079 : Latest = I;
1080 1312 : else if (Position > MemOps[Earliest].Position)
1081 : Earliest = I;
1082 : // Prepare for next MemOp.
1083 2171 : Offset += Size;
1084 : PRegNum = RegNum;
1085 : }
1086 :
1087 : // Form a candidate from the Ops collected so far.
1088 : MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1089 22485 : for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1090 24656 : Candidate->Instrs.push_back(MemOps[C].MI);
1091 10157 : Candidate->LatestMIIdx = Latest - SIndex;
1092 10157 : Candidate->EarliestMIIdx = Earliest - SIndex;
1093 10157 : Candidate->InsertPos = MemOps[Latest].Position;
1094 10157 : if (Count == 1)
1095 : CanMergeToLSMulti = CanMergeToLSDouble = false;
1096 10157 : Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1097 10157 : Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1098 10157 : Candidates.push_back(Candidate);
1099 : // Continue after the chain.
1100 : SIndex += Count;
1101 10157 : } while (SIndex < EIndex);
1102 9228 : }
1103 :
1104 12 : static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1105 : ARM_AM::AMSubMode Mode) {
1106 12 : switch (Opc) {
1107 0 : default: llvm_unreachable("Unhandled opcode!");
1108 2 : case ARM::LDMIA:
1109 : case ARM::LDMDA:
1110 : case ARM::LDMDB:
1111 : case ARM::LDMIB:
1112 : switch (Mode) {
1113 0 : default: llvm_unreachable("Unhandled submode!");
1114 : case ARM_AM::ia: return ARM::LDMIA_UPD;
1115 : case ARM_AM::ib: return ARM::LDMIB_UPD;
1116 : case ARM_AM::da: return ARM::LDMDA_UPD;
1117 : case ARM_AM::db: return ARM::LDMDB_UPD;
1118 : }
1119 1 : case ARM::STMIA:
1120 : case ARM::STMDA:
1121 : case ARM::STMDB:
1122 : case ARM::STMIB:
1123 : switch (Mode) {
1124 0 : default: llvm_unreachable("Unhandled submode!");
1125 : case ARM_AM::ia: return ARM::STMIA_UPD;
1126 : case ARM_AM::ib: return ARM::STMIB_UPD;
1127 : case ARM_AM::da: return ARM::STMDA_UPD;
1128 : case ARM_AM::db: return ARM::STMDB_UPD;
1129 : }
1130 2 : case ARM::t2LDMIA:
1131 : case ARM::t2LDMDB:
1132 2 : switch (Mode) {
1133 0 : default: llvm_unreachable("Unhandled submode!");
1134 : case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1135 0 : case ARM_AM::db: return ARM::t2LDMDB_UPD;
1136 : }
1137 4 : case ARM::t2STMIA:
1138 : case ARM::t2STMDB:
1139 4 : switch (Mode) {
1140 0 : default: llvm_unreachable("Unhandled submode!");
1141 : case ARM_AM::ia: return ARM::t2STMIA_UPD;
1142 0 : case ARM_AM::db: return ARM::t2STMDB_UPD;
1143 : }
1144 0 : case ARM::VLDMSIA:
1145 0 : switch (Mode) {
1146 0 : default: llvm_unreachable("Unhandled submode!");
1147 : case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1148 0 : case ARM_AM::db: return ARM::VLDMSDB_UPD;
1149 : }
1150 0 : case ARM::VLDMDIA:
1151 0 : switch (Mode) {
1152 0 : default: llvm_unreachable("Unhandled submode!");
1153 : case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1154 0 : case ARM_AM::db: return ARM::VLDMDDB_UPD;
1155 : }
1156 0 : case ARM::VSTMSIA:
1157 0 : switch (Mode) {
1158 0 : default: llvm_unreachable("Unhandled submode!");
1159 : case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1160 0 : case ARM_AM::db: return ARM::VSTMSDB_UPD;
1161 : }
1162 3 : case ARM::VSTMDIA:
1163 3 : switch (Mode) {
1164 0 : default: llvm_unreachable("Unhandled submode!");
1165 : case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1166 0 : case ARM_AM::db: return ARM::VSTMDDB_UPD;
1167 : }
1168 : }
1169 : }
1170 :
1171 : /// Check if the given instruction increments or decrements a register and
1172 : /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1173 : /// generated by the instruction are possibly read as well.
1174 6032 : static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1175 : ARMCC::CondCodes Pred, unsigned PredReg) {
1176 : bool CheckCPSRDef;
1177 : int Scale;
1178 12064 : switch (MI.getOpcode()) {
1179 : case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1180 0 : case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1181 38 : case ARM::t2SUBri:
1182 38 : case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1183 271 : case ARM::t2ADDri:
1184 271 : case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1185 11 : case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1186 14 : case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1187 : default: return 0;
1188 : }
1189 :
1190 : unsigned MIPredReg;
1191 499 : if (MI.getOperand(0).getReg() != Reg ||
1192 278 : MI.getOperand(1).getReg() != Reg ||
1193 447 : getInstrPredicate(MI, MIPredReg) != Pred ||
1194 93 : MIPredReg != PredReg)
1195 241 : return 0;
1196 :
1197 93 : if (CheckCPSRDef && definesCPSR(MI))
1198 : return 0;
1199 93 : return MI.getOperand(2).getImm() * Scale;
1200 : }
1201 :
1202 : /// Searches for an increment or decrement of \p Reg before \p MBBI.
1203 : static MachineBasicBlock::iterator
1204 3578 : findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1205 : ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1206 3578 : Offset = 0;
1207 3578 : MachineBasicBlock &MBB = *MBBI->getParent();
1208 : MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1209 3578 : MachineBasicBlock::iterator EndMBBI = MBB.end();
1210 3578 : if (MBBI == BeginMBBI)
1211 1073 : return EndMBBI;
1212 :
1213 : // Skip debug values.
1214 2505 : MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1215 5 : while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1216 : --PrevMBBI;
1217 :
1218 2505 : Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1219 2522 : return Offset == 0 ? EndMBBI : PrevMBBI;
1220 : }
1221 :
1222 : /// Searches for a increment or decrement of \p Reg after \p MBBI.
1223 : static MachineBasicBlock::iterator
1224 3567 : findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1225 : ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1226 3567 : Offset = 0;
1227 3567 : MachineBasicBlock &MBB = *MBBI->getParent();
1228 3567 : MachineBasicBlock::iterator EndMBBI = MBB.end();
1229 3567 : MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1230 : // Skip debug values.
1231 3571 : while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1232 : ++NextMBBI;
1233 3567 : if (NextMBBI == EndMBBI)
1234 40 : return EndMBBI;
1235 :
1236 3527 : Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1237 3603 : return Offset == 0 ? EndMBBI : NextMBBI;
1238 : }
1239 :
1240 : /// Fold proceeding/trailing inc/dec of base register into the
1241 : /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1242 : ///
1243 : /// stmia rn, <ra, rb, rc>
1244 : /// rn := rn + 4 * 3;
1245 : /// =>
1246 : /// stmia rn!, <ra, rb, rc>
1247 : ///
1248 : /// rn := rn - 4 * 3;
1249 : /// ldmia rn, <ra, rb, rc>
1250 : /// =>
1251 : /// ldmdb rn!, <ra, rb, rc>
1252 0 : bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1253 : // Thumb1 is already using updating loads/stores.
1254 0 : if (isThumb1) return false;
1255 :
1256 0 : const MachineOperand &BaseOP = MI->getOperand(0);
1257 0 : unsigned Base = BaseOP.getReg();
1258 : bool BaseKill = BaseOP.isKill();
1259 0 : unsigned PredReg = 0;
1260 0 : ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1261 0 : unsigned Opcode = MI->getOpcode();
1262 : DebugLoc DL = MI->getDebugLoc();
1263 :
1264 : // Can't use an updating ld/st if the base register is also a dest
1265 : // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1266 0 : for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1267 0 : if (MI->getOperand(i).getReg() == Base)
1268 0 : return false;
1269 :
1270 0 : int Bytes = getLSMultipleTransferSize(MI);
1271 0 : MachineBasicBlock &MBB = *MI->getParent();
1272 : MachineBasicBlock::iterator MBBI(MI);
1273 : int Offset;
1274 : MachineBasicBlock::iterator MergeInstr
1275 0 : = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1276 0 : ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1277 0 : if (Mode == ARM_AM::ia && Offset == -Bytes) {
1278 : Mode = ARM_AM::db;
1279 0 : } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1280 : Mode = ARM_AM::da;
1281 : } else {
1282 0 : MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1283 0 : if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1284 0 : ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1285 :
1286 : // We couldn't find an inc/dec to merge. But if the base is dead, we
1287 : // can still change to a writeback form as that will save us 2 bytes
1288 : // of code size. It can create WAW hazards though, so only do it if
1289 : // we're minimizing code size.
1290 0 : if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
1291 0 : return false;
1292 :
1293 : bool HighRegsUsed = false;
1294 0 : for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1295 0 : if (MI->getOperand(i).getReg() >= ARM::R8) {
1296 : HighRegsUsed = true;
1297 : break;
1298 : }
1299 :
1300 0 : if (!HighRegsUsed)
1301 : MergeInstr = MBB.end();
1302 : else
1303 0 : return false;
1304 : }
1305 : }
1306 0 : if (MergeInstr != MBB.end())
1307 0 : MBB.erase(MergeInstr);
1308 :
1309 0 : unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1310 0 : MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1311 0 : .addReg(Base, getDefRegState(true)) // WB base register
1312 0 : .addReg(Base, getKillRegState(BaseKill))
1313 0 : .addImm(Pred).addReg(PredReg);
1314 :
1315 : // Transfer the rest of operands.
1316 0 : for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1317 0 : MIB.add(MI->getOperand(OpNum));
1318 :
1319 : // Transfer memoperands.
1320 0 : MIB.setMemRefs(MI->memoperands());
1321 :
1322 0 : MBB.erase(MBBI);
1323 0 : return true;
1324 : }
1325 :
1326 : static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1327 : ARM_AM::AddrOpc Mode) {
1328 0 : switch (Opc) {
1329 : case ARM::LDRi12:
1330 : return ARM::LDR_PRE_IMM;
1331 0 : case ARM::STRi12:
1332 : return ARM::STR_PRE_IMM;
1333 0 : case ARM::VLDRS:
1334 : return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1335 0 : case ARM::VLDRD:
1336 : return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1337 0 : case ARM::VSTRS:
1338 : return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1339 0 : case ARM::VSTRD:
1340 : return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1341 0 : case ARM::t2LDRi8:
1342 : case ARM::t2LDRi12:
1343 : return ARM::t2LDR_PRE;
1344 0 : case ARM::t2STRi8:
1345 : case ARM::t2STRi12:
1346 : return ARM::t2STR_PRE;
1347 0 : default: llvm_unreachable("Unhandled opcode!");
1348 : }
1349 : }
1350 :
1351 : static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1352 : ARM_AM::AddrOpc Mode) {
1353 0 : switch (Opc) {
1354 : case ARM::LDRi12:
1355 : return ARM::LDR_POST_IMM;
1356 0 : case ARM::STRi12:
1357 : return ARM::STR_POST_IMM;
1358 0 : case ARM::VLDRS:
1359 : return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1360 0 : case ARM::VLDRD:
1361 : return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1362 0 : case ARM::VSTRS:
1363 : return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1364 0 : case ARM::VSTRD:
1365 : return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1366 0 : case ARM::t2LDRi8:
1367 : case ARM::t2LDRi12:
1368 : return ARM::t2LDR_POST;
1369 0 : case ARM::t2STRi8:
1370 : case ARM::t2STRi12:
1371 : return ARM::t2STR_POST;
1372 0 : default: llvm_unreachable("Unhandled opcode!");
1373 : }
1374 : }
1375 :
1376 : /// Fold proceeding/trailing inc/dec of base register into the
1377 : /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1378 0 : bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1379 : // Thumb1 doesn't have updating LDR/STR.
1380 : // FIXME: Use LDM/STM with single register instead.
1381 0 : if (isThumb1) return false;
1382 :
1383 0 : unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1384 : bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1385 0 : unsigned Opcode = MI->getOpcode();
1386 : DebugLoc DL = MI->getDebugLoc();
1387 0 : bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1388 0 : Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1389 0 : bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1390 : if (isi32Load(Opcode) || isi32Store(Opcode))
1391 0 : if (MI->getOperand(2).getImm() != 0)
1392 0 : return false;
1393 0 : if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1394 0 : return false;
1395 :
1396 : // Can't do the merge if the destination register is the same as the would-be
1397 : // writeback register.
1398 0 : if (MI->getOperand(0).getReg() == Base)
1399 0 : return false;
1400 :
1401 0 : unsigned PredReg = 0;
1402 0 : ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1403 0 : int Bytes = getLSMultipleTransferSize(MI);
1404 0 : MachineBasicBlock &MBB = *MI->getParent();
1405 : MachineBasicBlock::iterator MBBI(MI);
1406 : int Offset;
1407 : MachineBasicBlock::iterator MergeInstr
1408 0 : = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1409 : unsigned NewOpc;
1410 0 : if (!isAM5 && Offset == Bytes) {
1411 : NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1412 0 : } else if (Offset == -Bytes) {
1413 : NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1414 : } else {
1415 0 : MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1416 0 : if (Offset == Bytes) {
1417 : NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1418 0 : } else if (!isAM5 && Offset == -Bytes) {
1419 : NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1420 : } else
1421 0 : return false;
1422 : }
1423 0 : MBB.erase(MergeInstr);
1424 :
1425 0 : ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1426 :
1427 : bool isLd = isLoadSingle(Opcode);
1428 0 : if (isAM5) {
1429 : // VLDM[SD]_UPD, VSTM[SD]_UPD
1430 : // (There are no base-updating versions of VLDR/VSTR instructions, but the
1431 : // updating load/store-multiple instructions can be used with only one
1432 : // register.)
1433 0 : MachineOperand &MO = MI->getOperand(0);
1434 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1435 0 : .addReg(Base, getDefRegState(true)) // WB base register
1436 0 : .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1437 0 : .addImm(Pred).addReg(PredReg)
1438 : .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1439 0 : getKillRegState(MO.isKill())));
1440 0 : } else if (isLd) {
1441 0 : if (isAM2) {
1442 : // LDR_PRE, LDR_POST
1443 0 : if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1444 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1445 0 : .addReg(Base, RegState::Define)
1446 0 : .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1447 : } else {
1448 0 : int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1449 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1450 0 : .addReg(Base, RegState::Define)
1451 0 : .addReg(Base)
1452 0 : .addReg(0)
1453 0 : .addImm(Imm)
1454 0 : .add(predOps(Pred, PredReg));
1455 : }
1456 : } else {
1457 : // t2LDR_PRE, t2LDR_POST
1458 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1459 0 : .addReg(Base, RegState::Define)
1460 0 : .addReg(Base)
1461 0 : .addImm(Offset)
1462 0 : .add(predOps(Pred, PredReg));
1463 : }
1464 : } else {
1465 0 : MachineOperand &MO = MI->getOperand(0);
1466 : // FIXME: post-indexed stores use am2offset_imm, which still encodes
1467 : // the vestigal zero-reg offset register. When that's fixed, this clause
1468 : // can be removed entirely.
1469 0 : if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1470 0 : int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1471 : // STR_PRE, STR_POST
1472 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1473 0 : .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1474 0 : .addReg(Base)
1475 0 : .addReg(0)
1476 0 : .addImm(Imm)
1477 0 : .add(predOps(Pred, PredReg));
1478 : } else {
1479 : // t2STR_PRE, t2STR_POST
1480 0 : BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1481 0 : .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1482 0 : .addReg(Base)
1483 0 : .addImm(Offset)
1484 0 : .add(predOps(Pred, PredReg));
1485 : }
1486 : }
1487 0 : MBB.erase(MBBI);
1488 :
1489 0 : return true;
1490 : }
1491 :
1492 0 : bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1493 0 : unsigned Opcode = MI.getOpcode();
1494 : assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1495 : "Must have t2STRDi8 or t2LDRDi8");
1496 0 : if (MI.getOperand(3).getImm() != 0)
1497 0 : return false;
1498 :
1499 : // Behaviour for writeback is undefined if base register is the same as one
1500 : // of the others.
1501 : const MachineOperand &BaseOp = MI.getOperand(2);
1502 0 : unsigned Base = BaseOp.getReg();
1503 : const MachineOperand &Reg0Op = MI.getOperand(0);
1504 : const MachineOperand &Reg1Op = MI.getOperand(1);
1505 0 : if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1506 0 : return false;
1507 :
1508 : unsigned PredReg;
1509 0 : ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1510 : MachineBasicBlock::iterator MBBI(MI);
1511 0 : MachineBasicBlock &MBB = *MI.getParent();
1512 : int Offset;
1513 : MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1514 0 : PredReg, Offset);
1515 : unsigned NewOpc;
1516 0 : if (Offset == 8 || Offset == -8) {
1517 0 : NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1518 : } else {
1519 0 : MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1520 0 : if (Offset == 8 || Offset == -8) {
1521 0 : NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1522 : } else
1523 0 : return false;
1524 : }
1525 0 : MBB.erase(MergeInstr);
1526 :
1527 : DebugLoc DL = MI.getDebugLoc();
1528 0 : MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1529 0 : if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1530 0 : MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1531 : } else {
1532 : assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1533 0 : MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1534 : }
1535 0 : MIB.addReg(BaseOp.getReg(), RegState::Kill)
1536 0 : .addImm(Offset).addImm(Pred).addReg(PredReg);
1537 : assert(TII->get(Opcode).getNumOperands() == 6 &&
1538 : TII->get(NewOpc).getNumOperands() == 7 &&
1539 : "Unexpected number of operands in Opcode specification.");
1540 :
1541 : // Transfer implicit operands.
1542 0 : for (const MachineOperand &MO : MI.implicit_operands())
1543 : MIB.add(MO);
1544 0 : MIB.setMemRefs(MI.memoperands());
1545 :
1546 0 : MBB.erase(MBBI);
1547 : return true;
1548 : }
1549 :
1550 : /// Returns true if instruction is a memory operation that this pass is capable
1551 : /// of operating on.
1552 277487 : static bool isMemoryOp(const MachineInstr &MI) {
1553 277487 : unsigned Opcode = MI.getOpcode();
1554 277487 : switch (Opcode) {
1555 : case ARM::VLDRS:
1556 : case ARM::VSTRS:
1557 : case ARM::VLDRD:
1558 : case ARM::VSTRD:
1559 : case ARM::LDRi12:
1560 : case ARM::STRi12:
1561 : case ARM::tLDRi:
1562 : case ARM::tSTRi:
1563 : case ARM::tLDRspi:
1564 : case ARM::tSTRspi:
1565 : case ARM::t2LDRi8:
1566 : case ARM::t2LDRi12:
1567 : case ARM::t2STRi8:
1568 : case ARM::t2STRi12:
1569 : break;
1570 : default:
1571 : return false;
1572 : }
1573 62854 : if (!MI.getOperand(1).isReg())
1574 : return false;
1575 :
1576 : // When no memory operands are present, conservatively assume unaligned,
1577 : // volatile, unfoldable.
1578 24790 : if (!MI.hasOneMemOperand())
1579 : return false;
1580 :
1581 23601 : const MachineMemOperand &MMO = **MI.memoperands_begin();
1582 :
1583 : // Don't touch volatile memory accesses - we may be changing their order.
1584 47202 : if (MMO.isVolatile())
1585 : return false;
1586 :
1587 : // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1588 : // not.
1589 21758 : if (MMO.getAlignment() < 4)
1590 : return false;
1591 :
1592 : // str <undef> could probably be eliminated entirely, but for now we just want
1593 : // to avoid making a mess of it.
1594 : // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1595 42988 : if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1596 : return false;
1597 :
1598 : // Likewise don't mess with references to undefined addresses.
1599 21494 : if (MI.getOperand(1).isUndef())
1600 94 : return false;
1601 :
1602 : return true;
1603 : }
1604 :
1605 10 : static void InsertLDR_STR(MachineBasicBlock &MBB,
1606 : MachineBasicBlock::iterator &MBBI, int Offset,
1607 : bool isDef, unsigned NewOpc, unsigned Reg,
1608 : bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1609 : bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1610 : unsigned PredReg, const TargetInstrInfo *TII) {
1611 10 : if (isDef) {
1612 2 : MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1613 4 : TII->get(NewOpc))
1614 2 : .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1615 2 : .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1616 2 : MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1617 : } else {
1618 8 : MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1619 16 : TII->get(NewOpc))
1620 8 : .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1621 8 : .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1622 8 : MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1623 : }
1624 10 : }
1625 :
1626 134489 : bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1627 : MachineBasicBlock::iterator &MBBI) {
1628 : MachineInstr *MI = &*MBBI;
1629 134489 : unsigned Opcode = MI->getOpcode();
1630 : // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1631 : // if we see this opcode.
1632 134489 : if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1633 : return false;
1634 :
1635 154 : const MachineOperand &BaseOp = MI->getOperand(2);
1636 154 : unsigned BaseReg = BaseOp.getReg();
1637 154 : unsigned EvenReg = MI->getOperand(0).getReg();
1638 154 : unsigned OddReg = MI->getOperand(1).getReg();
1639 154 : unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1640 154 : unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1641 :
1642 : // ARM errata 602117: LDRD with base in list may result in incorrect base
1643 : // register when interrupted or faulted.
1644 43 : bool Errata602117 = EvenReg == BaseReg &&
1645 154 : (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1646 : // ARM LDRD/STRD needs consecutive registers.
1647 154 : bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1648 103 : (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1649 :
1650 154 : if (!Errata602117 && !NonConsecutiveRegs)
1651 : return false;
1652 :
1653 17 : bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1654 17 : bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1655 17 : bool EvenDeadKill = isLd ?
1656 17 : MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1657 17 : bool EvenUndef = MI->getOperand(0).isUndef();
1658 17 : bool OddDeadKill = isLd ?
1659 : MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1660 : bool OddUndef = MI->getOperand(1).isUndef();
1661 : bool BaseKill = BaseOp.isKill();
1662 : bool BaseUndef = BaseOp.isUndef();
1663 : assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1664 : "register offset not handled below");
1665 17 : int OffImm = getMemoryOpOffset(*MI);
1666 17 : unsigned PredReg = 0;
1667 17 : ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1668 :
1669 17 : if (OddRegNum > EvenRegNum && OffImm == 0) {
1670 : // Ascending register numbers and no offset. It's safe to change it to a
1671 : // ldm or stm.
1672 : unsigned NewOpc = (isLd)
1673 12 : ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1674 : : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1675 12 : if (isLd) {
1676 6 : BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1677 2 : .addReg(BaseReg, getKillRegState(BaseKill))
1678 2 : .addImm(Pred).addReg(PredReg)
1679 4 : .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1680 2 : .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1681 : ++NumLDRD2LDM;
1682 : } else {
1683 30 : BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1684 10 : .addReg(BaseReg, getKillRegState(BaseKill))
1685 10 : .addImm(Pred).addReg(PredReg)
1686 : .addReg(EvenReg,
1687 20 : getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1688 : .addReg(OddReg,
1689 10 : getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1690 : ++NumSTRD2STM;
1691 : }
1692 : } else {
1693 : // Split into two instructions.
1694 : unsigned NewOpc = (isLd)
1695 5 : ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1696 : : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1697 : // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1698 : // so adjust and use t2LDRi12 here for that.
1699 : unsigned NewOpc2 = (isLd)
1700 5 : ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1701 : : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1702 : // If this is a load, make sure the first load does not clobber the base
1703 : // register before the second load reads it.
1704 5 : if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1705 : assert(!TRI->regsOverlap(OddReg, BaseReg));
1706 1 : InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1707 : false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
1708 1 : InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1709 : false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1710 : } else {
1711 4 : if (OddReg == EvenReg && EvenDeadKill) {
1712 : // If the two source operands are the same, the kill marker is
1713 : // probably on the first one. e.g.
1714 : // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1715 : EvenDeadKill = false;
1716 : OddDeadKill = true;
1717 : }
1718 : // Never kill the base register in the first instruction.
1719 4 : if (EvenReg == BaseReg)
1720 : EvenDeadKill = false;
1721 4 : InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1722 : EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
1723 4 : InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1724 : OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1725 : }
1726 : if (isLd)
1727 : ++NumLDRD2LDR;
1728 : else
1729 : ++NumSTRD2STR;
1730 : }
1731 :
1732 17 : MBBI = MBB.erase(MBBI);
1733 17 : return true;
1734 : }
1735 :
1736 : /// An optimization pass to turn multiple LDR / STR ops of the same base and
1737 : /// incrementing offset into LDM / STM ops.
1738 18397 : bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1739 18397 : MemOpQueue MemOps;
1740 : unsigned CurrBase = 0;
1741 : unsigned CurrOpc = ~0u;
1742 : ARMCC::CondCodes CurrPred = ARMCC::AL;
1743 : unsigned Position = 0;
1744 : assert(Candidates.size() == 0);
1745 : assert(MergeBaseCandidates.size() == 0);
1746 18397 : LiveRegsValid = false;
1747 :
1748 152886 : for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1749 134489 : I = MBBI) {
1750 : // The instruction in front of the iterator is the one we look at.
1751 134489 : MBBI = std::prev(I);
1752 134489 : if (FixInvalidRegPairOp(MBB, MBBI))
1753 : continue;
1754 134472 : ++Position;
1755 :
1756 134472 : if (isMemoryOp(*MBBI)) {
1757 15086 : unsigned Opcode = MBBI->getOpcode();
1758 15086 : const MachineOperand &MO = MBBI->getOperand(0);
1759 15086 : unsigned Reg = MO.getReg();
1760 15086 : unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1761 15086 : unsigned PredReg = 0;
1762 15086 : ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1763 15086 : int Offset = getMemoryOpOffset(*MBBI);
1764 15086 : if (CurrBase == 0) {
1765 : // Start of a new chain.
1766 : CurrBase = Base;
1767 : CurrOpc = Opcode;
1768 : CurrPred = Pred;
1769 9228 : MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1770 12328 : continue;
1771 : }
1772 : // Note: No need to match PredReg in the next if.
1773 5858 : if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1774 : // Watch out for:
1775 : // r4 := ldr [r0, #8]
1776 : // r4 := ldr [r0, #4]
1777 : // or
1778 : // r0 := ldr [r0]
1779 : // If a load overrides the base register or a register loaded by
1780 : // another load in our chain, we cannot take this instruction.
1781 : bool Overlap = false;
1782 : if (isLoadSingle(Opcode)) {
1783 2075 : Overlap = (Base == Reg);
1784 2075 : if (!Overlap) {
1785 6322 : for (const MemOpQueueEntry &E : MemOps) {
1786 4338 : if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1787 : Overlap = true;
1788 : break;
1789 : }
1790 : }
1791 : }
1792 : }
1793 :
1794 2075 : if (!Overlap) {
1795 : // Check offset and sort memory operation into the current chain.
1796 3101 : if (Offset > MemOps.back().Offset) {
1797 1028 : MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1798 1028 : continue;
1799 : } else {
1800 : MemOpQueue::iterator MI, ME;
1801 2328 : for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1802 2328 : if (Offset < MI->Offset) {
1803 : // Found a place to insert.
1804 : break;
1805 : }
1806 256 : if (Offset == MI->Offset) {
1807 : // Collision, abort.
1808 : MI = ME;
1809 : break;
1810 : }
1811 : }
1812 2073 : if (MI != MemOps.end()) {
1813 2072 : MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1814 2072 : continue;
1815 : }
1816 : }
1817 : }
1818 : }
1819 :
1820 : // Don't advance the iterator; The op will start a new chain next.
1821 2758 : MBBI = I;
1822 : --Position;
1823 : // Fallthrough to look into existing chain.
1824 : } else if (MBBI->isDebugInstr()) {
1825 : continue;
1826 119256 : } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1827 : MBBI->getOpcode() == ARM::t2STRDi8) {
1828 : // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1829 : // remember them because we may still be able to merge add/sub into them.
1830 129 : MergeBaseCandidates.push_back(&*MBBI);
1831 : }
1832 :
1833 : // If we are here then the chain is broken; Extract candidates for a merge.
1834 122014 : if (MemOps.size() > 0) {
1835 7793 : FormCandidates(MemOps);
1836 : // Reset for the next chain.
1837 : CurrBase = 0;
1838 : CurrOpc = ~0u;
1839 : CurrPred = ARMCC::AL;
1840 : MemOps.clear();
1841 : }
1842 : }
1843 18397 : if (MemOps.size() > 0)
1844 1435 : FormCandidates(MemOps);
1845 :
1846 : // Sort candidates so they get processed from end to begin of the basic
1847 : // block later; This is necessary for liveness calculation.
1848 : auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1849 0 : return M0->InsertPos < M1->InsertPos;
1850 : };
1851 18397 : llvm::sort(Candidates, LessThan);
1852 :
1853 : // Go through list of candidates and merge.
1854 : bool Changed = false;
1855 28554 : for (const MergeCandidate *Candidate : Candidates) {
1856 10157 : if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1857 996 : MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1858 : // Merge preceding/trailing base inc/dec into the merged op.
1859 996 : if (Merged) {
1860 : Changed = true;
1861 770 : unsigned Opcode = Merged->getOpcode();
1862 770 : if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1863 157 : MergeBaseUpdateLSDouble(*Merged);
1864 : else
1865 613 : MergeBaseUpdateLSMultiple(Merged);
1866 : } else {
1867 811 : for (MachineInstr *MI : Candidate->Instrs) {
1868 585 : if (MergeBaseUpdateLoadStore(MI))
1869 : Changed = true;
1870 : }
1871 : }
1872 : } else {
1873 : assert(Candidate->Instrs.size() == 1);
1874 9161 : if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1875 : Changed = true;
1876 : }
1877 : }
1878 : Candidates.clear();
1879 : // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1880 18526 : for (MachineInstr *MI : MergeBaseCandidates)
1881 129 : MergeBaseUpdateLSDouble(*MI);
1882 : MergeBaseCandidates.clear();
1883 :
1884 18397 : return Changed;
1885 : }
1886 :
1887 : /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1888 : /// into the preceding stack restore so it directly restore the value of LR
1889 : /// into pc.
1890 : /// ldmfd sp!, {..., lr}
1891 : /// bx lr
1892 : /// or
1893 : /// ldmfd sp!, {..., lr}
1894 : /// mov pc, lr
1895 : /// =>
1896 : /// ldmfd sp!, {..., pc}
1897 14022 : bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1898 : // Thumb1 LDM doesn't allow high registers.
1899 14022 : if (isThumb1) return false;
1900 12433 : if (MBB.empty()) return false;
1901 :
1902 12392 : MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1903 12392 : if (MBBI != MBB.begin() && MBBI != MBB.end() &&
1904 11140 : (MBBI->getOpcode() == ARM::BX_RET ||
1905 4988 : MBBI->getOpcode() == ARM::tBX_RET ||
1906 : MBBI->getOpcode() == ARM::MOVPCLR)) {
1907 6152 : MachineBasicBlock::iterator PrevI = std::prev(MBBI);
1908 : // Ignore any debug instructions.
1909 7 : while (PrevI->isDebugInstr() && PrevI != MBB.begin())
1910 : --PrevI;
1911 : MachineInstr &PrevMI = *PrevI;
1912 : unsigned Opcode = PrevMI.getOpcode();
1913 6152 : if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1914 6144 : Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1915 6144 : Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1916 25 : MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
1917 25 : if (MO.getReg() != ARM::LR)
1918 25 : return false;
1919 20 : unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1920 : assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1921 : Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1922 20 : PrevMI.setDesc(TII->get(NewOpc));
1923 20 : MO.setReg(ARM::PC);
1924 20 : PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
1925 20 : MBB.erase(MBBI);
1926 : // We now restore LR into PC so it is not live-out of the return block
1927 : // anymore: Clear the CSI Restored bit.
1928 20 : MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
1929 : // CSI should be fixed after PrologEpilog Insertion
1930 : assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
1931 20 : for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
1932 20 : if (Info.getReg() == ARM::LR) {
1933 : Info.setRestored(false);
1934 : break;
1935 : }
1936 : }
1937 20 : return true;
1938 : }
1939 : }
1940 : return false;
1941 : }
1942 :
1943 0 : bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
1944 0 : MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1945 0 : if (MBBI == MBB.begin() || MBBI == MBB.end() ||
1946 0 : MBBI->getOpcode() != ARM::tBX_RET)
1947 0 : return false;
1948 :
1949 0 : MachineBasicBlock::iterator Prev = MBBI;
1950 : --Prev;
1951 0 : if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1952 0 : return false;
1953 :
1954 0 : for (auto Use : Prev->uses())
1955 0 : if (Use.isKill()) {
1956 : assert(STI->hasV4TOps());
1957 0 : BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
1958 0 : .addReg(Use.getReg(), RegState::Kill)
1959 0 : .add(predOps(ARMCC::AL))
1960 : .copyImplicitOps(*MBBI);
1961 0 : MBB.erase(MBBI);
1962 0 : MBB.erase(Prev);
1963 0 : return true;
1964 : }
1965 :
1966 0 : llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
1967 : }
1968 :
1969 13379 : bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1970 13379 : if (skipFunction(Fn.getFunction()))
1971 : return false;
1972 :
1973 13371 : MF = &Fn;
1974 13371 : STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1975 13371 : TL = STI->getTargetLowering();
1976 13371 : AFI = Fn.getInfo<ARMFunctionInfo>();
1977 13371 : TII = STI->getInstrInfo();
1978 13371 : TRI = STI->getRegisterInfo();
1979 :
1980 13371 : RegClassInfoValid = false;
1981 13371 : isThumb2 = AFI->isThumb2Function();
1982 25589 : isThumb1 = AFI->isThumbFunction() && !isThumb2;
1983 :
1984 : bool Modified = false;
1985 31768 : for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1986 : ++MFI) {
1987 : MachineBasicBlock &MBB = *MFI;
1988 18397 : Modified |= LoadStoreMultipleOpti(MBB);
1989 18397 : if (STI->hasV5TOps())
1990 14022 : Modified |= MergeReturnIntoLDM(MBB);
1991 18397 : if (isThumb1)
1992 2164 : Modified |= CombineMovBx(MBB);
1993 : }
1994 :
1995 13371 : Allocator.DestroyAll();
1996 13371 : return Modified;
1997 : }
1998 :
1999 : #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2000 : "ARM pre- register allocation load / store optimization pass"
2001 :
2002 : namespace {
2003 :
2004 : /// Pre- register allocation pass that move load / stores from consecutive
2005 : /// locations close to make it more likely they will be combined later.
2006 : struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2007 : static char ID;
2008 :
2009 : AliasAnalysis *AA;
2010 : const DataLayout *TD;
2011 : const TargetInstrInfo *TII;
2012 : const TargetRegisterInfo *TRI;
2013 : const ARMSubtarget *STI;
2014 : MachineRegisterInfo *MRI;
2015 : MachineFunction *MF;
2016 :
2017 2571 : ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2018 :
2019 : bool runOnMachineFunction(MachineFunction &Fn) override;
2020 :
2021 2559 : StringRef getPassName() const override {
2022 2559 : return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
2023 : }
2024 :
2025 2559 : void getAnalysisUsage(AnalysisUsage &AU) const override {
2026 : AU.addRequired<AAResultsWrapperPass>();
2027 2559 : MachineFunctionPass::getAnalysisUsage(AU);
2028 2559 : }
2029 :
2030 : private:
2031 : bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2032 : unsigned &NewOpc, unsigned &EvenReg,
2033 : unsigned &OddReg, unsigned &BaseReg,
2034 : int &Offset,
2035 : unsigned &PredReg, ARMCC::CondCodes &Pred,
2036 : bool &isT2);
2037 : bool RescheduleOps(MachineBasicBlock *MBB,
2038 : SmallVectorImpl<MachineInstr *> &Ops,
2039 : unsigned Base, bool isLd,
2040 : DenseMap<MachineInstr*, unsigned> &MI2LocMap);
2041 : bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2042 : };
2043 :
2044 : } // end anonymous namespace
2045 :
2046 : char ARMPreAllocLoadStoreOpt::ID = 0;
2047 :
2048 199024 : INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2049 : ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
2050 :
2051 13378 : bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2052 13378 : if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2053 27 : return false;
2054 :
2055 13351 : TD = &Fn.getDataLayout();
2056 13351 : STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
2057 13351 : TII = STI->getInstrInfo();
2058 13351 : TRI = STI->getRegisterInfo();
2059 13351 : MRI = &Fn.getRegInfo();
2060 13351 : MF = &Fn;
2061 13351 : AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2062 :
2063 : bool Modified = false;
2064 31923 : for (MachineBasicBlock &MFI : Fn)
2065 18572 : Modified |= RescheduleLoadStoreInstrs(&MFI);
2066 :
2067 : return Modified;
2068 : }
2069 :
2070 697 : static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2071 : MachineBasicBlock::iterator I,
2072 : MachineBasicBlock::iterator E,
2073 : SmallPtrSetImpl<MachineInstr*> &MemOps,
2074 : SmallSet<unsigned, 4> &MemRegs,
2075 : const TargetRegisterInfo *TRI,
2076 : AliasAnalysis *AA) {
2077 : // Are there stores / loads / calls between them?
2078 697 : SmallSet<unsigned, 4> AddedRegPressure;
2079 1766 : while (++I != E) {
2080 1099 : if (I->isDebugInstr() || MemOps.count(&*I))
2081 774 : continue;
2082 652 : if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2083 9 : return false;
2084 317 : if (I->mayStore() || (!isLd && I->mayLoad()))
2085 103 : for (MachineInstr *MemOp : MemOps)
2086 86 : if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2087 22 : return false;
2088 1847 : for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2089 1552 : MachineOperand &MO = I->getOperand(j);
2090 1552 : if (!MO.isReg())
2091 448 : continue;
2092 1104 : unsigned Reg = MO.getReg();
2093 1104 : if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2094 0 : return false;
2095 1104 : if (Reg != Base && !MemRegs.count(Reg))
2096 862 : AddedRegPressure.insert(Reg);
2097 : }
2098 : }
2099 :
2100 : // Estimate register pressure increase due to the transformation.
2101 666 : if (MemRegs.size() <= 4)
2102 : // Ok if we are moving small number of instructions.
2103 : return true;
2104 84 : return AddedRegPressure.size() <= MemRegs.size() * 2;
2105 : }
2106 :
2107 : bool
2108 389 : ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
2109 : DebugLoc &dl, unsigned &NewOpc,
2110 : unsigned &FirstReg,
2111 : unsigned &SecondReg,
2112 : unsigned &BaseReg, int &Offset,
2113 : unsigned &PredReg,
2114 : ARMCC::CondCodes &Pred,
2115 : bool &isT2) {
2116 : // Make sure we're allowed to generate LDRD/STRD.
2117 389 : if (!STI->hasV5TEOps())
2118 : return false;
2119 :
2120 : // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2121 : unsigned Scale = 1;
2122 333 : unsigned Opcode = Op0->getOpcode();
2123 333 : if (Opcode == ARM::LDRi12) {
2124 37 : NewOpc = ARM::LDRD;
2125 296 : } else if (Opcode == ARM::STRi12) {
2126 68 : NewOpc = ARM::STRD;
2127 228 : } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2128 58 : NewOpc = ARM::t2LDRDi8;
2129 : Scale = 4;
2130 58 : isT2 = true;
2131 170 : } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2132 98 : NewOpc = ARM::t2STRDi8;
2133 : Scale = 4;
2134 98 : isT2 = true;
2135 : } else {
2136 : return false;
2137 : }
2138 :
2139 : // Make sure the base address satisfies i64 ld / st alignment requirement.
2140 : // At the moment, we ignore the memoryoperand's value.
2141 : // If we want to use AliasAnalysis, we should check it accordingly.
2142 522 : if (!Op0->hasOneMemOperand() ||
2143 261 : (*Op0->memoperands_begin())->isVolatile())
2144 0 : return false;
2145 :
2146 261 : unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2147 261 : const Function &Func = MF->getFunction();
2148 261 : unsigned ReqAlign = STI->hasV6Ops()
2149 261 : ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
2150 : : 8; // Pre-v6 need 8-byte align
2151 261 : if (Align < ReqAlign)
2152 : return false;
2153 :
2154 : // Then make sure the immediate offset fits.
2155 231 : int OffImm = getMemoryOpOffset(*Op0);
2156 231 : if (isT2) {
2157 132 : int Limit = (1 << 8) * Scale;
2158 132 : if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2159 : return false;
2160 131 : Offset = OffImm;
2161 : } else {
2162 : ARM_AM::AddrOpc AddSub = ARM_AM::add;
2163 99 : if (OffImm < 0) {
2164 : AddSub = ARM_AM::sub;
2165 0 : OffImm = - OffImm;
2166 : }
2167 99 : int Limit = (1 << 8) * Scale;
2168 99 : if (OffImm >= Limit || (OffImm & (Scale-1)))
2169 : return false;
2170 99 : Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2171 : }
2172 230 : FirstReg = Op0->getOperand(0).getReg();
2173 230 : SecondReg = Op1->getOperand(0).getReg();
2174 230 : if (FirstReg == SecondReg)
2175 : return false;
2176 225 : BaseReg = Op0->getOperand(1).getReg();
2177 225 : Pred = getInstrPredicate(*Op0, PredReg);
2178 : dl = Op0->getDebugLoc();
2179 225 : return true;
2180 : }
2181 :
2182 681 : bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2183 : SmallVectorImpl<MachineInstr *> &Ops,
2184 : unsigned Base, bool isLd,
2185 : DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2186 : bool RetVal = false;
2187 :
2188 : // Sort by offset (in reverse order).
2189 : llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2190 0 : int LOffset = getMemoryOpOffset(*LHS);
2191 0 : int ROffset = getMemoryOpOffset(*RHS);
2192 : assert(LHS == RHS || LOffset != ROffset);
2193 0 : return LOffset > ROffset;
2194 : });
2195 :
2196 : // The loads / stores of the same base are in order. Scan them from first to
2197 : // last and check for the following:
2198 : // 1. Any def of base.
2199 : // 2. Any gaps.
2200 1430 : while (Ops.size() > 1) {
2201 : unsigned FirstLoc = ~0U;
2202 : unsigned LastLoc = 0;
2203 : MachineInstr *FirstOp = nullptr;
2204 : MachineInstr *LastOp = nullptr;
2205 : int LastOffset = 0;
2206 : unsigned LastOpcode = 0;
2207 : unsigned LastBytes = 0;
2208 : unsigned NumMove = 0;
2209 3012 : for (int i = Ops.size() - 1; i >= 0; --i) {
2210 : // Make sure each operation has the same kind.
2211 2367 : MachineInstr *Op = Ops[i];
2212 : unsigned LSMOpcode
2213 2367 : = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2214 2367 : if (LastOpcode && LSMOpcode != LastOpcode)
2215 : break;
2216 :
2217 : // Check that we have a continuous set of offsets.
2218 2364 : int Offset = getMemoryOpOffset(*Op);
2219 2364 : unsigned Bytes = getLSMultipleTransferSize(Op);
2220 2364 : if (LastBytes) {
2221 1615 : if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2222 : break;
2223 : }
2224 :
2225 : // Don't try to reschedule too many instructions.
2226 2302 : if (NumMove == 8) // FIXME: Tune this limit.
2227 : break;
2228 :
2229 : // Found a mergable instruction; save information about it.
2230 2263 : ++NumMove;
2231 : LastOffset = Offset;
2232 : LastBytes = Bytes;
2233 : LastOpcode = LSMOpcode;
2234 :
2235 2263 : unsigned Loc = MI2LocMap[Op];
2236 2263 : if (Loc <= FirstLoc) {
2237 : FirstLoc = Loc;
2238 1594 : FirstOp = Op;
2239 : }
2240 2263 : if (Loc >= LastLoc) {
2241 : LastLoc = Loc;
2242 1389 : LastOp = Op;
2243 : }
2244 : }
2245 :
2246 749 : if (NumMove <= 1)
2247 : Ops.pop_back();
2248 : else {
2249 : SmallPtrSet<MachineInstr*, 4> MemOps;
2250 707 : SmallSet<unsigned, 4> MemRegs;
2251 3635 : for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2252 2221 : MemOps.insert(Ops[i]);
2253 2221 : MemRegs.insert(Ops[i]->getOperand(0).getReg());
2254 : }
2255 :
2256 : // Be conservative, if the instructions are too far apart, don't
2257 : // move them. We want to limit the increase of register pressure.
2258 707 : bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2259 707 : if (DoMove)
2260 1394 : DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2261 : MemOps, MemRegs, TRI, AA);
2262 707 : if (!DoMove) {
2263 181 : for (unsigned i = 0; i != NumMove; ++i)
2264 : Ops.pop_back();
2265 : } else {
2266 : // This is the new location for the loads / stores.
2267 664 : MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2268 3363 : while (InsertPos != MBB->end() &&
2269 1678 : (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2270 : ++InsertPos;
2271 :
2272 : // If we are moving a pair of loads / stores, see if it makes sense
2273 : // to try to allocate a pair of registers that can form register pairs.
2274 664 : MachineInstr *Op0 = Ops.back();
2275 664 : MachineInstr *Op1 = Ops[Ops.size()-2];
2276 664 : unsigned FirstReg = 0, SecondReg = 0;
2277 664 : unsigned BaseReg = 0, PredReg = 0;
2278 664 : ARMCC::CondCodes Pred = ARMCC::AL;
2279 664 : bool isT2 = false;
2280 664 : unsigned NewOpc = 0;
2281 664 : int Offset = 0;
2282 664 : DebugLoc dl;
2283 664 : if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2284 : FirstReg, SecondReg, BaseReg,
2285 : Offset, PredReg, Pred, isT2)) {
2286 : Ops.pop_back();
2287 : Ops.pop_back();
2288 :
2289 225 : const MCInstrDesc &MCID = TII->get(NewOpc);
2290 225 : const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2291 225 : MRI->constrainRegClass(FirstReg, TRC);
2292 225 : MRI->constrainRegClass(SecondReg, TRC);
2293 :
2294 : // Form the pair instruction.
2295 225 : if (isLd) {
2296 83 : MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2297 83 : .addReg(FirstReg, RegState::Define)
2298 83 : .addReg(SecondReg, RegState::Define)
2299 83 : .addReg(BaseReg);
2300 : // FIXME: We're converting from LDRi12 to an insn that still
2301 : // uses addrmode2, so we need an explicit offset reg. It should
2302 : // always by reg0 since we're transforming LDRi12s.
2303 83 : if (!isT2)
2304 34 : MIB.addReg(0);
2305 83 : MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2306 83 : MIB.cloneMergedMemRefs({Op0, Op1});
2307 : LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2308 : ++NumLDRDFormed;
2309 : } else {
2310 142 : MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2311 142 : .addReg(FirstReg)
2312 142 : .addReg(SecondReg)
2313 142 : .addReg(BaseReg);
2314 : // FIXME: We're converting from LDRi12 to an insn that still
2315 : // uses addrmode2, so we need an explicit offset reg. It should
2316 : // always by reg0 since we're transforming STRi12s.
2317 142 : if (!isT2)
2318 64 : MIB.addReg(0);
2319 142 : MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2320 142 : MIB.cloneMergedMemRefs({Op0, Op1});
2321 : LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2322 : ++NumSTRDFormed;
2323 : }
2324 : MBB->erase(Op0);
2325 : MBB->erase(Op1);
2326 :
2327 225 : if (!isT2) {
2328 : // Add register allocation hints to form register pairs.
2329 98 : MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2330 98 : MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2331 : }
2332 : } else {
2333 2072 : for (unsigned i = 0; i != NumMove; ++i) {
2334 1633 : MachineInstr *Op = Ops.back();
2335 : Ops.pop_back();
2336 1633 : MBB->splice(InsertPos, MBB, Op);
2337 : }
2338 : }
2339 :
2340 : NumLdStMoved += NumMove;
2341 : RetVal = true;
2342 : }
2343 : }
2344 : }
2345 :
2346 681 : return RetVal;
2347 : }
2348 :
2349 : bool
2350 18572 : ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2351 : bool RetVal = false;
2352 :
2353 : DenseMap<MachineInstr*, unsigned> MI2LocMap;
2354 : DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
2355 : DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
2356 : SmallVector<unsigned, 4> LdBases;
2357 : SmallVector<unsigned, 4> StBases;
2358 :
2359 : unsigned Loc = 0;
2360 : MachineBasicBlock::iterator MBBI = MBB->begin();
2361 : MachineBasicBlock::iterator E = MBB->end();
2362 45030 : while (MBBI != E) {
2363 169450 : for (; MBBI != E; ++MBBI) {
2364 : MachineInstr &MI = *MBBI;
2365 329024 : if (MI.isCall() || MI.isTerminator()) {
2366 : // Stop at barriers.
2367 : ++MBBI;
2368 25014 : break;
2369 : }
2370 :
2371 : if (!MI.isDebugInstr())
2372 142889 : MI2LocMap[&MI] = ++Loc;
2373 :
2374 143015 : if (!isMemoryOp(MI))
2375 136701 : continue;
2376 6314 : unsigned PredReg = 0;
2377 6314 : if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2378 : continue;
2379 :
2380 6314 : int Opc = MI.getOpcode();
2381 : bool isLd = isLoadSingle(Opc);
2382 6314 : unsigned Base = MI.getOperand(1).getReg();
2383 6314 : int Offset = getMemoryOpOffset(MI);
2384 :
2385 : bool StopHere = false;
2386 6314 : if (isLd) {
2387 : DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2388 3583 : Base2LdsMap.find(Base);
2389 3583 : if (BI != Base2LdsMap.end()) {
2390 2449 : for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2391 3838 : if (Offset == getMemoryOpOffset(*BI->second[i])) {
2392 : StopHere = true;
2393 : break;
2394 : }
2395 : }
2396 551 : if (!StopHere)
2397 530 : BI->second.push_back(&MI);
2398 : } else {
2399 3032 : Base2LdsMap[Base].push_back(&MI);
2400 3032 : LdBases.push_back(Base);
2401 : }
2402 : } else {
2403 : DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2404 2731 : Base2StsMap.find(Base);
2405 2731 : if (BI != Base2StsMap.end()) {
2406 18177 : for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2407 34178 : if (Offset == getMemoryOpOffset(*BI->second[i])) {
2408 : StopHere = true;
2409 : break;
2410 : }
2411 : }
2412 1090 : if (!StopHere)
2413 1088 : BI->second.push_back(&MI);
2414 : } else {
2415 1641 : Base2StsMap[Base].push_back(&MI);
2416 1641 : StBases.push_back(Base);
2417 : }
2418 : }
2419 :
2420 1641 : if (StopHere) {
2421 : // Found a duplicate (a base+offset combination that's seen earlier).
2422 : // Backtrack.
2423 23 : --Loc;
2424 23 : break;
2425 : }
2426 : }
2427 :
2428 : // Re-schedule loads.
2429 29490 : for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2430 6064 : unsigned Base = LdBases[i];
2431 3032 : SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2432 3032 : if (Lds.size() > 1)
2433 264 : RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2434 : }
2435 :
2436 : // Re-schedule stores.
2437 28099 : for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2438 3282 : unsigned Base = StBases[i];
2439 1641 : SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2440 1641 : if (Sts.size() > 1)
2441 417 : RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2442 : }
2443 :
2444 26458 : if (MBBI != E) {
2445 8160 : Base2LdsMap.clear();
2446 8160 : Base2StsMap.clear();
2447 : LdBases.clear();
2448 : StBases.clear();
2449 : }
2450 : }
2451 :
2452 18572 : return RetVal;
2453 : }
2454 :
2455 : /// Returns an instance of the load / store optimization pass.
2456 5138 : FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2457 5138 : if (PreAlloc)
2458 5138 : return new ARMPreAllocLoadStoreOpt();
2459 2569 : return new ARMLoadStoreOpt();
2460 : }
|