LLVM 20.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Type.h"
57#include "llvm/MC/MCInstrDesc.h"
58#include "llvm/Pass.h"
61#include "llvm/Support/Debug.h"
64#include <algorithm>
65#include <cassert>
66#include <cstddef>
67#include <cstdlib>
68#include <iterator>
69#include <limits>
70#include <utility>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "arm-ldst-opt"
75
76STATISTIC(NumLDMGened , "Number of ldm instructions generated");
77STATISTIC(NumSTMGened , "Number of stm instructions generated");
78STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
79STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
80STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
81STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
82STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
83STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
84STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
85STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
86STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
87
88/// This switch disables formation of double/multi instructions that could
89/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
90/// disabled. This can be used to create libraries that are robust even when
91/// users provoke undefined behaviour by supplying misaligned pointers.
92/// \see mayCombineMisaligned()
93static cl::opt<bool>
94AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
95 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
96
97#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
98
99namespace {
100
101 /// Post- register allocation pass the combine load / store instructions to
102 /// form ldm / stm instructions.
103 struct ARMLoadStoreOpt : public MachineFunctionPass {
104 static char ID;
105
106 const MachineFunction *MF;
107 const TargetInstrInfo *TII;
108 const TargetRegisterInfo *TRI;
109 const ARMSubtarget *STI;
110 const TargetLowering *TL;
111 ARMFunctionInfo *AFI;
112 LiveRegUnits LiveRegs;
113 RegisterClassInfo RegClassInfo;
115 bool LiveRegsValid;
116 bool RegClassInfoValid;
117 bool isThumb1, isThumb2;
118
119 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
120
121 bool runOnMachineFunction(MachineFunction &Fn) override;
122
125 MachineFunctionProperties::Property::NoVRegs);
126 }
127
128 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
129
130 private:
131 /// A set of load/store MachineInstrs with same base register sorted by
132 /// offset.
133 struct MemOpQueueEntry {
135 int Offset; ///< Load/Store offset.
136 unsigned Position; ///< Position as counted from end of basic block.
137
138 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
139 : MI(&MI), Offset(Offset), Position(Position) {}
140 };
141 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
142
143 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
144 /// merged into a LDM/STM.
145 struct MergeCandidate {
146 /// List of instructions ordered by load/store offset.
148
149 /// Index in Instrs of the instruction being latest in the schedule.
150 unsigned LatestMIIdx;
151
152 /// Index in Instrs of the instruction being earliest in the schedule.
153 unsigned EarliestMIIdx;
154
155 /// Index into the basic block where the merged instruction will be
156 /// inserted. (See MemOpQueueEntry.Position)
157 unsigned InsertPos;
158
159 /// Whether the instructions can be merged into a ldm/stm instruction.
160 bool CanMergeToLSMulti;
161
162 /// Whether the instructions can be merged into a ldrd/strd instruction.
163 bool CanMergeToLSDouble;
164 };
167 SmallVector<MachineInstr*,4> MergeBaseCandidates;
168
169 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
171 unsigned findFreeReg(const TargetRegisterClass &RegClass);
172 void UpdateBaseRegUses(MachineBasicBlock &MBB,
174 unsigned Base, unsigned WordOffset,
175 ARMCC::CondCodes Pred, unsigned PredReg);
176 MachineInstr *CreateLoadStoreMulti(
178 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
179 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
180 ArrayRef<std::pair<unsigned, bool>> Regs,
182 MachineInstr *CreateLoadStoreDouble(
184 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
185 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
186 ArrayRef<std::pair<unsigned, bool>> Regs,
187 ArrayRef<MachineInstr*> Instrs) const;
188 void FormCandidates(const MemOpQueue &MemOps);
189 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
190 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
192 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
193 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
194 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
195 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
196 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
197 bool CombineMovBx(MachineBasicBlock &MBB);
198 };
199
200} // end anonymous namespace
201
202char ARMLoadStoreOpt::ID = 0;
203
204INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
205 false)
206
207static bool definesCPSR(const MachineInstr &MI) {
208 for (const auto &MO : MI.operands()) {
209 if (!MO.isReg())
210 continue;
211 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
212 // If the instruction has live CPSR def, then it's not safe to fold it
213 // into load / store.
214 return true;
215 }
216
217 return false;
218}
219
221 unsigned Opcode = MI.getOpcode();
222 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
223 unsigned NumOperands = MI.getDesc().getNumOperands();
224 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
225
226 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
227 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
228 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
229 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
230 return OffField;
231
232 // Thumb1 immediate offsets are scaled by 4
233 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
234 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
235 return OffField * 4;
236
237 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
238 : ARM_AM::getAM5Offset(OffField) * 4;
239 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
240 : ARM_AM::getAM5Op(OffField);
241
242 if (Op == ARM_AM::sub)
243 return -Offset;
244
245 return Offset;
246}
247
249 return MI.getOperand(1);
250}
251
253 return MI.getOperand(0);
254}
255
256static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
257 switch (Opcode) {
258 default: llvm_unreachable("Unhandled opcode!");
259 case ARM::LDRi12:
260 ++NumLDMGened;
261 switch (Mode) {
262 default: llvm_unreachable("Unhandled submode!");
263 case ARM_AM::ia: return ARM::LDMIA;
264 case ARM_AM::da: return ARM::LDMDA;
265 case ARM_AM::db: return ARM::LDMDB;
266 case ARM_AM::ib: return ARM::LDMIB;
267 }
268 case ARM::STRi12:
269 ++NumSTMGened;
270 switch (Mode) {
271 default: llvm_unreachable("Unhandled submode!");
272 case ARM_AM::ia: return ARM::STMIA;
273 case ARM_AM::da: return ARM::STMDA;
274 case ARM_AM::db: return ARM::STMDB;
275 case ARM_AM::ib: return ARM::STMIB;
276 }
277 case ARM::tLDRi:
278 case ARM::tLDRspi:
279 // tLDMIA is writeback-only - unless the base register is in the input
280 // reglist.
281 ++NumLDMGened;
282 switch (Mode) {
283 default: llvm_unreachable("Unhandled submode!");
284 case ARM_AM::ia: return ARM::tLDMIA;
285 }
286 case ARM::tSTRi:
287 case ARM::tSTRspi:
288 // There is no non-writeback tSTMIA either.
289 ++NumSTMGened;
290 switch (Mode) {
291 default: llvm_unreachable("Unhandled submode!");
292 case ARM_AM::ia: return ARM::tSTMIA_UPD;
293 }
294 case ARM::t2LDRi8:
295 case ARM::t2LDRi12:
296 ++NumLDMGened;
297 switch (Mode) {
298 default: llvm_unreachable("Unhandled submode!");
299 case ARM_AM::ia: return ARM::t2LDMIA;
300 case ARM_AM::db: return ARM::t2LDMDB;
301 }
302 case ARM::t2STRi8:
303 case ARM::t2STRi12:
304 ++NumSTMGened;
305 switch (Mode) {
306 default: llvm_unreachable("Unhandled submode!");
307 case ARM_AM::ia: return ARM::t2STMIA;
308 case ARM_AM::db: return ARM::t2STMDB;
309 }
310 case ARM::VLDRS:
311 ++NumVLDMGened;
312 switch (Mode) {
313 default: llvm_unreachable("Unhandled submode!");
314 case ARM_AM::ia: return ARM::VLDMSIA;
315 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
316 }
317 case ARM::VSTRS:
318 ++NumVSTMGened;
319 switch (Mode) {
320 default: llvm_unreachable("Unhandled submode!");
321 case ARM_AM::ia: return ARM::VSTMSIA;
322 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
323 }
324 case ARM::VLDRD:
325 ++NumVLDMGened;
326 switch (Mode) {
327 default: llvm_unreachable("Unhandled submode!");
328 case ARM_AM::ia: return ARM::VLDMDIA;
329 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
330 }
331 case ARM::VSTRD:
332 ++NumVSTMGened;
333 switch (Mode) {
334 default: llvm_unreachable("Unhandled submode!");
335 case ARM_AM::ia: return ARM::VSTMDIA;
336 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
337 }
338 }
339}
340
342 switch (Opcode) {
343 default: llvm_unreachable("Unhandled opcode!");
344 case ARM::LDMIA_RET:
345 case ARM::LDMIA:
346 case ARM::LDMIA_UPD:
347 case ARM::STMIA:
348 case ARM::STMIA_UPD:
349 case ARM::tLDMIA:
350 case ARM::tLDMIA_UPD:
351 case ARM::tSTMIA_UPD:
352 case ARM::t2LDMIA_RET:
353 case ARM::t2LDMIA:
354 case ARM::t2LDMIA_UPD:
355 case ARM::t2STMIA:
356 case ARM::t2STMIA_UPD:
357 case ARM::VLDMSIA:
358 case ARM::VLDMSIA_UPD:
359 case ARM::VSTMSIA:
360 case ARM::VSTMSIA_UPD:
361 case ARM::VLDMDIA:
362 case ARM::VLDMDIA_UPD:
363 case ARM::VSTMDIA:
364 case ARM::VSTMDIA_UPD:
365 return ARM_AM::ia;
366
367 case ARM::LDMDA:
368 case ARM::LDMDA_UPD:
369 case ARM::STMDA:
370 case ARM::STMDA_UPD:
371 return ARM_AM::da;
372
373 case ARM::LDMDB:
374 case ARM::LDMDB_UPD:
375 case ARM::STMDB:
376 case ARM::STMDB_UPD:
377 case ARM::t2LDMDB:
378 case ARM::t2LDMDB_UPD:
379 case ARM::t2STMDB:
380 case ARM::t2STMDB_UPD:
381 case ARM::VLDMSDB_UPD:
382 case ARM::VSTMSDB_UPD:
383 case ARM::VLDMDDB_UPD:
384 case ARM::VSTMDDB_UPD:
385 return ARM_AM::db;
386
387 case ARM::LDMIB:
388 case ARM::LDMIB_UPD:
389 case ARM::STMIB:
390 case ARM::STMIB_UPD:
391 return ARM_AM::ib;
392 }
393}
394
395static bool isT1i32Load(unsigned Opc) {
396 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
397}
398
399static bool isT2i32Load(unsigned Opc) {
400 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
401}
402
403static bool isi32Load(unsigned Opc) {
404 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
405}
406
407static bool isT1i32Store(unsigned Opc) {
408 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
409}
410
411static bool isT2i32Store(unsigned Opc) {
412 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
413}
414
415static bool isi32Store(unsigned Opc) {
416 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
417}
418
419static bool isLoadSingle(unsigned Opc) {
420 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
421}
422
423static unsigned getImmScale(unsigned Opc) {
424 switch (Opc) {
425 default: llvm_unreachable("Unhandled opcode!");
426 case ARM::tLDRi:
427 case ARM::tSTRi:
428 case ARM::tLDRspi:
429 case ARM::tSTRspi:
430 return 1;
431 case ARM::tLDRHi:
432 case ARM::tSTRHi:
433 return 2;
434 case ARM::tLDRBi:
435 case ARM::tSTRBi:
436 return 4;
437 }
438}
439
441 switch (MI->getOpcode()) {
442 default: return 0;
443 case ARM::LDRi12:
444 case ARM::STRi12:
445 case ARM::tLDRi:
446 case ARM::tSTRi:
447 case ARM::tLDRspi:
448 case ARM::tSTRspi:
449 case ARM::t2LDRi8:
450 case ARM::t2LDRi12:
451 case ARM::t2STRi8:
452 case ARM::t2STRi12:
453 case ARM::VLDRS:
454 case ARM::VSTRS:
455 return 4;
456 case ARM::VLDRD:
457 case ARM::VSTRD:
458 return 8;
459 case ARM::LDMIA:
460 case ARM::LDMDA:
461 case ARM::LDMDB:
462 case ARM::LDMIB:
463 case ARM::STMIA:
464 case ARM::STMDA:
465 case ARM::STMDB:
466 case ARM::STMIB:
467 case ARM::tLDMIA:
468 case ARM::tLDMIA_UPD:
469 case ARM::tSTMIA_UPD:
470 case ARM::t2LDMIA:
471 case ARM::t2LDMDB:
472 case ARM::t2STMIA:
473 case ARM::t2STMDB:
474 case ARM::VLDMSIA:
475 case ARM::VSTMSIA:
476 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
477 case ARM::VLDMDIA:
478 case ARM::VSTMDIA:
479 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
480 }
481}
482
483/// Update future uses of the base register with the offset introduced
484/// due to writeback. This function only works on Thumb1.
485void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
487 const DebugLoc &DL, unsigned Base,
488 unsigned WordOffset,
489 ARMCC::CondCodes Pred,
490 unsigned PredReg) {
491 assert(isThumb1 && "Can only update base register uses for Thumb1!");
492 // Start updating any instructions with immediate offsets. Insert a SUB before
493 // the first non-updateable instruction (if any).
494 for (; MBBI != MBB.end(); ++MBBI) {
495 bool InsertSub = false;
496 unsigned Opc = MBBI->getOpcode();
497
498 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
499 int Offset;
500 bool IsLoad =
501 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
502 bool IsStore =
503 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
504
505 if (IsLoad || IsStore) {
506 // Loads and stores with immediate offsets can be updated, but only if
507 // the new offset isn't negative.
508 // The MachineOperand containing the offset immediate is the last one
509 // before predicates.
510 MachineOperand &MO =
511 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
512 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
513 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
514
515 // If storing the base register, it needs to be reset first.
516 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
517
518 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
519 MO.setImm(Offset);
520 else
521 InsertSub = true;
522 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
523 !definesCPSR(*MBBI)) {
524 // SUBS/ADDS using this register, with a dead def of the CPSR.
525 // Merge it with the update; if the merged offset is too large,
526 // insert a new sub instead.
527 MachineOperand &MO =
528 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
529 Offset = (Opc == ARM::tSUBi8) ?
530 MO.getImm() + WordOffset * 4 :
531 MO.getImm() - WordOffset * 4 ;
532 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
533 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
534 // Offset == 0.
535 MO.setImm(Offset);
536 // The base register has now been reset, so exit early.
537 return;
538 } else {
539 InsertSub = true;
540 }
541 } else {
542 // Can't update the instruction.
543 InsertSub = true;
544 }
545 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
546 // Since SUBS sets the condition flags, we can't place the base reset
547 // after an instruction that has a live CPSR def.
548 // The base register might also contain an argument for a function call.
549 InsertSub = true;
550 }
551
552 if (InsertSub) {
553 // An instruction above couldn't be updated, so insert a sub.
554 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
555 .add(t1CondCodeOp(true))
556 .addReg(Base)
557 .addImm(WordOffset * 4)
558 .addImm(Pred)
559 .addReg(PredReg);
560 return;
561 }
562
563 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
564 MBBI->definesRegister(Base, /*TRI=*/nullptr))
565 // Register got killed. Stop updating.
566 return;
567 }
568
569 // End of block was reached.
570 if (!MBB.succ_empty()) {
571 // FIXME: Because of a bug, live registers are sometimes missing from
572 // the successor blocks' live-in sets. This means we can't trust that
573 // information and *always* have to reset at the end of a block.
574 // See PR21029.
575 if (MBBI != MBB.end()) --MBBI;
576 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
577 .add(t1CondCodeOp(true))
578 .addReg(Base)
579 .addImm(WordOffset * 4)
580 .addImm(Pred)
581 .addReg(PredReg);
582 }
583}
584
585/// Return the first register of class \p RegClass that is not in \p Regs.
586unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
587 if (!RegClassInfoValid) {
588 RegClassInfo.runOnMachineFunction(*MF);
589 RegClassInfoValid = true;
590 }
591
592 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
593 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
594 return Reg;
595 return 0;
596}
597
598/// Compute live registers just before instruction \p Before (in normal schedule
599/// direction). Computes backwards so multiple queries in the same block must
600/// come in reverse order.
601void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
603 // Initialize if we never queried in this block.
604 if (!LiveRegsValid) {
605 LiveRegs.init(*TRI);
606 LiveRegs.addLiveOuts(MBB);
607 LiveRegPos = MBB.end();
608 LiveRegsValid = true;
609 }
610 // Move backward just before the "Before" position.
611 while (LiveRegPos != Before) {
612 --LiveRegPos;
613 LiveRegs.stepBackward(*LiveRegPos);
614 }
615}
616
617static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
618 unsigned Reg) {
619 for (const std::pair<unsigned, bool> &R : Regs)
620 if (R.first == Reg)
621 return true;
622 return false;
623}
624
625/// Create and insert a LDM or STM with Base as base register and registers in
626/// Regs as the register operands that would be loaded / stored. It returns
627/// true if the transformation is done.
628MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
630 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
631 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
632 ArrayRef<std::pair<unsigned, bool>> Regs,
634 unsigned NumRegs = Regs.size();
635 assert(NumRegs > 1);
636
637 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
638 // Compute liveness information for that register to make the decision.
639 bool SafeToClobberCPSR = !isThumb1 ||
640 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
642
643 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
644
645 // Exception: If the base register is in the input reglist, Thumb1 LDM is
646 // non-writeback.
647 // It's also not possible to merge an STR of the base register in Thumb1.
648 if (isThumb1 && ContainsReg(Regs, Base)) {
649 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
650 if (Opcode == ARM::tLDRi)
651 Writeback = false;
652 else if (Opcode == ARM::tSTRi)
653 return nullptr;
654 }
655
657 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
658 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
659 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
660
661 if (Offset == 4 && haveIBAndDA) {
663 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
665 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
666 // VLDM/VSTM do not support DB mode without also updating the base reg.
668 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
669 // Check if this is a supported opcode before inserting instructions to
670 // calculate a new base register.
671 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
672
673 // If starting offset isn't zero, insert a MI to materialize a new base.
674 // But only do so if it is cost effective, i.e. merging more than two
675 // loads / stores.
676 if (NumRegs <= 2)
677 return nullptr;
678
679 // On Thumb1, it's not worth materializing a new base register without
680 // clobbering the CPSR (i.e. not using ADDS/SUBS).
681 if (!SafeToClobberCPSR)
682 return nullptr;
683
684 unsigned NewBase;
685 if (isi32Load(Opcode)) {
686 // If it is a load, then just use one of the destination registers
687 // as the new base. Will no longer be writeback in Thumb1.
688 NewBase = Regs[NumRegs-1].first;
689 Writeback = false;
690 } else {
691 // Find a free register that we can use as scratch register.
692 moveLiveRegsBefore(MBB, InsertBefore);
693 // The merged instruction does not exist yet but will use several Regs if
694 // it is a Store.
695 if (!isLoadSingle(Opcode))
696 for (const std::pair<unsigned, bool> &R : Regs)
697 LiveRegs.addReg(R.first);
698
699 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
700 if (NewBase == 0)
701 return nullptr;
702 }
703
704 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
705 : ARM::t2ADDri)
706 : (isThumb1 && Base == ARM::SP)
707 ? ARM::tADDrSPi
708 : (isThumb1 && Offset < 8)
709 ? ARM::tADDi3
710 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
711
712 if (Offset < 0) {
713 // FIXME: There are no Thumb1 load/store instructions with negative
714 // offsets. So the Base != ARM::SP might be unnecessary.
715 Offset = -Offset;
716 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
717 : ARM::t2SUBri)
718 : (isThumb1 && Offset < 8 && Base != ARM::SP)
719 ? ARM::tSUBi3
720 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
721 }
722
723 if (!TL->isLegalAddImmediate(Offset))
724 // FIXME: Try add with register operand?
725 return nullptr; // Probably not worth it then.
726
727 // We can only append a kill flag to the add/sub input if the value is not
728 // used in the register list of the stm as well.
729 bool KillOldBase = BaseKill &&
730 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
731
732 if (isThumb1) {
733 // Thumb1: depending on immediate size, use either
734 // ADDS NewBase, Base, #imm3
735 // or
736 // MOV NewBase, Base
737 // ADDS NewBase, #imm8.
738 if (Base != NewBase &&
739 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
740 // Need to insert a MOV to the new base first.
741 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
742 !STI->hasV6Ops()) {
743 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
744 if (Pred != ARMCC::AL)
745 return nullptr;
746 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
747 .addReg(Base, getKillRegState(KillOldBase));
748 } else
749 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
750 .addReg(Base, getKillRegState(KillOldBase))
751 .add(predOps(Pred, PredReg));
752
753 // The following ADDS/SUBS becomes an update.
754 Base = NewBase;
755 KillOldBase = true;
756 }
757 if (BaseOpc == ARM::tADDrSPi) {
758 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
759 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
760 .addReg(Base, getKillRegState(KillOldBase))
761 .addImm(Offset / 4)
762 .add(predOps(Pred, PredReg));
763 } else
764 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
765 .add(t1CondCodeOp(true))
766 .addReg(Base, getKillRegState(KillOldBase))
767 .addImm(Offset)
768 .add(predOps(Pred, PredReg));
769 } else {
770 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
771 .addReg(Base, getKillRegState(KillOldBase))
772 .addImm(Offset)
773 .add(predOps(Pred, PredReg))
774 .add(condCodeOp());
775 }
776 Base = NewBase;
777 BaseKill = true; // New base is always killed straight away.
778 }
779
780 bool isDef = isLoadSingle(Opcode);
781
782 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
783 // base register writeback.
784 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
785 if (!Opcode)
786 return nullptr;
787
788 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
789 // - There is no writeback (LDM of base register),
790 // - the base register is killed by the merged instruction,
791 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
792 // to reset the base register.
793 // Otherwise, don't merge.
794 // It's safe to return here since the code to materialize a new base register
795 // above is also conditional on SafeToClobberCPSR.
796 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
797 return nullptr;
798
800
801 if (Writeback) {
802 assert(isThumb1 && "expected Writeback only inThumb1");
803 if (Opcode == ARM::tLDMIA) {
804 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
805 // Update tLDMIA with writeback if necessary.
806 Opcode = ARM::tLDMIA_UPD;
807 }
808
809 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
810
811 // Thumb1: we might need to set base writeback when building the MI.
812 MIB.addReg(Base, getDefRegState(true))
813 .addReg(Base, getKillRegState(BaseKill));
814
815 // The base isn't dead after a merged instruction with writeback.
816 // Insert a sub instruction after the newly formed instruction to reset.
817 if (!BaseKill)
818 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
819 } else {
820 // No writeback, simply build the MachineInstr.
821 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
822 MIB.addReg(Base, getKillRegState(BaseKill));
823 }
824
825 MIB.addImm(Pred).addReg(PredReg);
826
827 for (const std::pair<unsigned, bool> &R : Regs)
828 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
829
830 MIB.cloneMergedMemRefs(Instrs);
831
832 return MIB.getInstr();
833}
834
835MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
837 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
838 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
839 ArrayRef<std::pair<unsigned, bool>> Regs,
840 ArrayRef<MachineInstr*> Instrs) const {
841 bool IsLoad = isi32Load(Opcode);
842 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
843 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
844
845 assert(Regs.size() == 2);
846 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
847 TII->get(LoadStoreOpcode));
848 if (IsLoad) {
849 MIB.addReg(Regs[0].first, RegState::Define)
850 .addReg(Regs[1].first, RegState::Define);
851 } else {
852 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
853 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
854 }
855 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
856 MIB.cloneMergedMemRefs(Instrs);
857 return MIB.getInstr();
858}
859
860/// Call MergeOps and update MemOps and merges accordingly on success.
861MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
862 const MachineInstr *First = Cand.Instrs.front();
863 unsigned Opcode = First->getOpcode();
864 bool IsLoad = isLoadSingle(Opcode);
867 DenseSet<unsigned> KilledRegs;
868 DenseSet<unsigned> UsedRegs;
869 // Determine list of registers and list of implicit super-register defs.
870 for (const MachineInstr *MI : Cand.Instrs) {
871 const MachineOperand &MO = getLoadStoreRegOp(*MI);
872 Register Reg = MO.getReg();
873 bool IsKill = MO.isKill();
874 if (IsKill)
875 KilledRegs.insert(Reg);
876 Regs.push_back(std::make_pair(Reg, IsKill));
877 UsedRegs.insert(Reg);
878
879 if (IsLoad) {
880 // Collect any implicit defs of super-registers, after merging we can't
881 // be sure anymore that we properly preserved these live ranges and must
882 // removed these implicit operands.
883 for (const MachineOperand &MO : MI->implicit_operands()) {
884 if (!MO.isReg() || !MO.isDef() || MO.isDead())
885 continue;
886 assert(MO.isImplicit());
887 Register DefReg = MO.getReg();
888
889 if (is_contained(ImpDefs, DefReg))
890 continue;
891 // We can ignore cases where the super-reg is read and written.
892 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
893 continue;
894 ImpDefs.push_back(DefReg);
895 }
896 }
897 }
898
899 // Attempt the merge.
900 using iterator = MachineBasicBlock::iterator;
901
902 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
903 iterator InsertBefore = std::next(iterator(LatestMI));
904 MachineBasicBlock &MBB = *LatestMI->getParent();
905 unsigned Offset = getMemoryOpOffset(*First);
907 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
908 Register PredReg;
909 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
910 DebugLoc DL = First->getDebugLoc();
911 MachineInstr *Merged = nullptr;
912 if (Cand.CanMergeToLSDouble)
913 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
914 Opcode, Pred, PredReg, DL, Regs,
915 Cand.Instrs);
916 if (!Merged && Cand.CanMergeToLSMulti)
917 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
918 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
919 if (!Merged)
920 return nullptr;
921
922 // Determine earliest instruction that will get removed. We then keep an
923 // iterator just above it so the following erases don't invalidated it.
924 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
925 bool EarliestAtBegin = false;
926 if (EarliestI == MBB.begin()) {
927 EarliestAtBegin = true;
928 } else {
929 EarliestI = std::prev(EarliestI);
930 }
931
932 // Remove instructions which have been merged.
933 for (MachineInstr *MI : Cand.Instrs)
934 MBB.erase(MI);
935
936 // Determine range between the earliest removed instruction and the new one.
937 if (EarliestAtBegin)
938 EarliestI = MBB.begin();
939 else
940 EarliestI = std::next(EarliestI);
941 auto FixupRange = make_range(EarliestI, iterator(Merged));
942
943 if (isLoadSingle(Opcode)) {
944 // If the previous loads defined a super-reg, then we have to mark earlier
945 // operands undef; Replicate the super-reg def on the merged instruction.
946 for (MachineInstr &MI : FixupRange) {
947 for (unsigned &ImpDefReg : ImpDefs) {
948 for (MachineOperand &MO : MI.implicit_operands()) {
949 if (!MO.isReg() || MO.getReg() != ImpDefReg)
950 continue;
951 if (MO.readsReg())
952 MO.setIsUndef();
953 else if (MO.isDef())
954 ImpDefReg = 0;
955 }
956 }
957 }
958
959 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
960 for (unsigned ImpDef : ImpDefs)
961 MIB.addReg(ImpDef, RegState::ImplicitDefine);
962 } else {
963 // Remove kill flags: We are possibly storing the values later now.
964 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
965 for (MachineInstr &MI : FixupRange) {
966 for (MachineOperand &MO : MI.uses()) {
967 if (!MO.isReg() || !MO.isKill())
968 continue;
969 if (UsedRegs.count(MO.getReg()))
970 MO.setIsKill(false);
971 }
972 }
973 assert(ImpDefs.empty());
974 }
975
976 return Merged;
977}
978
980 unsigned Value = abs(Offset);
981 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
982 // multiplied by 4.
983 return (Value % 4) == 0 && Value < 1024;
984}
985
986/// Return true for loads/stores that can be combined to a double/multi
987/// operation without increasing the requirements for alignment.
989 const MachineInstr &MI) {
990 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
991 // difference.
992 unsigned Opcode = MI.getOpcode();
993 if (!isi32Load(Opcode) && !isi32Store(Opcode))
994 return true;
995
996 // Stack pointer alignment is out of the programmers control so we can trust
997 // SP-relative loads/stores.
998 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
1000 return true;
1001 return false;
1002}
1003
1004/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1005void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1006 const MachineInstr *FirstMI = MemOps[0].MI;
1007 unsigned Opcode = FirstMI->getOpcode();
1008 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1009 unsigned Size = getLSMultipleTransferSize(FirstMI);
1010
1011 unsigned SIndex = 0;
1012 unsigned EIndex = MemOps.size();
1013 do {
1014 // Look at the first instruction.
1015 const MachineInstr *MI = MemOps[SIndex].MI;
1016 int Offset = MemOps[SIndex].Offset;
1017 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1018 Register PReg = PMO.getReg();
1019 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1020 : TRI->getEncodingValue(PReg);
1021 unsigned Latest = SIndex;
1022 unsigned Earliest = SIndex;
1023 unsigned Count = 1;
1024 bool CanMergeToLSDouble =
1025 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1026 // ARM errata 602117: LDRD with base in list may result in incorrect base
1027 // register when interrupted or faulted.
1028 if (STI->isCortexM3() && isi32Load(Opcode) &&
1029 PReg == getLoadStoreBaseOp(*MI).getReg())
1030 CanMergeToLSDouble = false;
1031
1032 bool CanMergeToLSMulti = true;
1033 // On swift vldm/vstm starting with an odd register number as that needs
1034 // more uops than single vldrs.
1035 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1036 CanMergeToLSMulti = false;
1037
1038 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1039 // deprecated; LDM to PC is fine but cannot happen here.
1040 if (PReg == ARM::SP || PReg == ARM::PC)
1041 CanMergeToLSMulti = CanMergeToLSDouble = false;
1042
1043 // Should we be conservative?
1045 CanMergeToLSMulti = CanMergeToLSDouble = false;
1046
1047 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1048 unsigned Limit;
1049 switch (Opcode) {
1050 default:
1051 Limit = UINT_MAX;
1052 break;
1053 case ARM::VLDRD:
1054 case ARM::VSTRD:
1055 Limit = 16;
1056 break;
1057 }
1058
1059 // Merge following instructions where possible.
1060 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1061 int NewOffset = MemOps[I].Offset;
1062 if (NewOffset != Offset + (int)Size)
1063 break;
1064 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1065 Register Reg = MO.getReg();
1066 if (Reg == ARM::SP || Reg == ARM::PC)
1067 break;
1068 if (Count == Limit)
1069 break;
1070
1071 // See if the current load/store may be part of a multi load/store.
1072 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1073 : TRI->getEncodingValue(Reg);
1074 bool PartOfLSMulti = CanMergeToLSMulti;
1075 if (PartOfLSMulti) {
1076 // Register numbers must be in ascending order.
1077 if (RegNum <= PRegNum)
1078 PartOfLSMulti = false;
1079 // For VFP / NEON load/store multiples, the registers must be
1080 // consecutive and within the limit on the number of registers per
1081 // instruction.
1082 else if (!isNotVFP && RegNum != PRegNum+1)
1083 PartOfLSMulti = false;
1084 }
1085 // See if the current load/store may be part of a double load/store.
1086 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1087
1088 if (!PartOfLSMulti && !PartOfLSDouble)
1089 break;
1090 CanMergeToLSMulti &= PartOfLSMulti;
1091 CanMergeToLSDouble &= PartOfLSDouble;
1092 // Track MemOp with latest and earliest position (Positions are
1093 // counted in reverse).
1094 unsigned Position = MemOps[I].Position;
1095 if (Position < MemOps[Latest].Position)
1096 Latest = I;
1097 else if (Position > MemOps[Earliest].Position)
1098 Earliest = I;
1099 // Prepare for next MemOp.
1100 Offset += Size;
1101 PRegNum = RegNum;
1102 }
1103
1104 // Form a candidate from the Ops collected so far.
1105 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1106 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1107 Candidate->Instrs.push_back(MemOps[C].MI);
1108 Candidate->LatestMIIdx = Latest - SIndex;
1109 Candidate->EarliestMIIdx = Earliest - SIndex;
1110 Candidate->InsertPos = MemOps[Latest].Position;
1111 if (Count == 1)
1112 CanMergeToLSMulti = CanMergeToLSDouble = false;
1113 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1114 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1115 Candidates.push_back(Candidate);
1116 // Continue after the chain.
1117 SIndex += Count;
1118 } while (SIndex < EIndex);
1119}
1120
1121static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1122 ARM_AM::AMSubMode Mode) {
1123 switch (Opc) {
1124 default: llvm_unreachable("Unhandled opcode!");
1125 case ARM::LDMIA:
1126 case ARM::LDMDA:
1127 case ARM::LDMDB:
1128 case ARM::LDMIB:
1129 switch (Mode) {
1130 default: llvm_unreachable("Unhandled submode!");
1131 case ARM_AM::ia: return ARM::LDMIA_UPD;
1132 case ARM_AM::ib: return ARM::LDMIB_UPD;
1133 case ARM_AM::da: return ARM::LDMDA_UPD;
1134 case ARM_AM::db: return ARM::LDMDB_UPD;
1135 }
1136 case ARM::STMIA:
1137 case ARM::STMDA:
1138 case ARM::STMDB:
1139 case ARM::STMIB:
1140 switch (Mode) {
1141 default: llvm_unreachable("Unhandled submode!");
1142 case ARM_AM::ia: return ARM::STMIA_UPD;
1143 case ARM_AM::ib: return ARM::STMIB_UPD;
1144 case ARM_AM::da: return ARM::STMDA_UPD;
1145 case ARM_AM::db: return ARM::STMDB_UPD;
1146 }
1147 case ARM::t2LDMIA:
1148 case ARM::t2LDMDB:
1149 switch (Mode) {
1150 default: llvm_unreachable("Unhandled submode!");
1151 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1152 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1153 }
1154 case ARM::t2STMIA:
1155 case ARM::t2STMDB:
1156 switch (Mode) {
1157 default: llvm_unreachable("Unhandled submode!");
1158 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1159 case ARM_AM::db: return ARM::t2STMDB_UPD;
1160 }
1161 case ARM::VLDMSIA:
1162 switch (Mode) {
1163 default: llvm_unreachable("Unhandled submode!");
1164 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1165 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1166 }
1167 case ARM::VLDMDIA:
1168 switch (Mode) {
1169 default: llvm_unreachable("Unhandled submode!");
1170 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1171 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1172 }
1173 case ARM::VSTMSIA:
1174 switch (Mode) {
1175 default: llvm_unreachable("Unhandled submode!");
1176 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1177 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1178 }
1179 case ARM::VSTMDIA:
1180 switch (Mode) {
1181 default: llvm_unreachable("Unhandled submode!");
1182 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1183 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1184 }
1185 }
1186}
1187
1188/// Check if the given instruction increments or decrements a register and
1189/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1190/// generated by the instruction are possibly read as well.
1192 ARMCC::CondCodes Pred, Register PredReg) {
1193 bool CheckCPSRDef;
1194 int Scale;
1195 switch (MI.getOpcode()) {
1196 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1197 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1198 case ARM::t2SUBri:
1199 case ARM::t2SUBspImm:
1200 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1201 case ARM::t2ADDri:
1202 case ARM::t2ADDspImm:
1203 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1204 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1205 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1206 default: return 0;
1207 }
1208
1209 Register MIPredReg;
1210 if (MI.getOperand(0).getReg() != Reg ||
1211 MI.getOperand(1).getReg() != Reg ||
1212 getInstrPredicate(MI, MIPredReg) != Pred ||
1213 MIPredReg != PredReg)
1214 return 0;
1215
1216 if (CheckCPSRDef && definesCPSR(MI))
1217 return 0;
1218 return MI.getOperand(2).getImm() * Scale;
1219}
1220
1221/// Searches for an increment or decrement of \p Reg before \p MBBI.
1224 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1225 Offset = 0;
1229 if (MBBI == BeginMBBI)
1230 return EndMBBI;
1231
1232 // Skip debug values.
1233 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1234 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1235 --PrevMBBI;
1236
1237 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1238 return Offset == 0 ? EndMBBI : PrevMBBI;
1239}
1240
1241/// Searches for a increment or decrement of \p Reg after \p MBBI.
1244 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1245 const TargetRegisterInfo *TRI) {
1246 Offset = 0;
1249 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1250 while (NextMBBI != EndMBBI) {
1251 // Skip debug values.
1252 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1253 ++NextMBBI;
1254 if (NextMBBI == EndMBBI)
1255 return EndMBBI;
1256
1257 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1258 if (Off) {
1259 Offset = Off;
1260 return NextMBBI;
1261 }
1262
1263 // SP can only be combined if it is the next instruction after the original
1264 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1265 // anything below the new pointer) when its frame elements are still in
1266 // use. Other registers can attempt to look further, until a different use
1267 // or def of the register is found.
1268 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1269 NextMBBI->definesRegister(Reg, TRI))
1270 return EndMBBI;
1271
1272 ++NextMBBI;
1273 }
1274 return EndMBBI;
1275}
1276
1277/// Fold proceeding/trailing inc/dec of base register into the
1278/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1279///
1280/// stmia rn, <ra, rb, rc>
1281/// rn := rn + 4 * 3;
1282/// =>
1283/// stmia rn!, <ra, rb, rc>
1284///
1285/// rn := rn - 4 * 3;
1286/// ldmia rn, <ra, rb, rc>
1287/// =>
1288/// ldmdb rn!, <ra, rb, rc>
1289bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1290 // Thumb1 is already using updating loads/stores.
1291 if (isThumb1) return false;
1292 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1293
1294 const MachineOperand &BaseOP = MI->getOperand(0);
1295 Register Base = BaseOP.getReg();
1296 bool BaseKill = BaseOP.isKill();
1297 Register PredReg;
1298 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1299 unsigned Opcode = MI->getOpcode();
1300 DebugLoc DL = MI->getDebugLoc();
1301
1302 // Can't use an updating ld/st if the base register is also a dest
1303 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1304 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1305 if (MO.getReg() == Base)
1306 return false;
1307
1308 int Bytes = getLSMultipleTransferSize(MI);
1309 MachineBasicBlock &MBB = *MI->getParent();
1311 int Offset;
1313 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1315 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1316 Mode = ARM_AM::db;
1317 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1318 Mode = ARM_AM::da;
1319 } else {
1320 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1321 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1322 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1323
1324 // We couldn't find an inc/dec to merge. But if the base is dead, we
1325 // can still change to a writeback form as that will save us 2 bytes
1326 // of code size. It can create WAW hazards though, so only do it if
1327 // we're minimizing code size.
1328 if (!STI->hasMinSize() || !BaseKill)
1329 return false;
1330
1331 bool HighRegsUsed = false;
1332 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1333 if (MO.getReg() >= ARM::R8) {
1334 HighRegsUsed = true;
1335 break;
1336 }
1337
1338 if (!HighRegsUsed)
1339 MergeInstr = MBB.end();
1340 else
1341 return false;
1342 }
1343 }
1344 if (MergeInstr != MBB.end()) {
1345 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1346 MBB.erase(MergeInstr);
1347 }
1348
1349 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1350 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1351 .addReg(Base, getDefRegState(true)) // WB base register
1352 .addReg(Base, getKillRegState(BaseKill))
1353 .addImm(Pred).addReg(PredReg);
1354
1355 // Transfer the rest of operands.
1356 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1357 MIB.add(MO);
1358
1359 // Transfer memoperands.
1360 MIB.setMemRefs(MI->memoperands());
1361
1362 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1363 MBB.erase(MBBI);
1364 return true;
1365}
1366
1367static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1368 ARM_AM::AddrOpc Mode) {
1369 switch (Opc) {
1370 case ARM::LDRi12:
1371 return ARM::LDR_PRE_IMM;
1372 case ARM::STRi12:
1373 return ARM::STR_PRE_IMM;
1374 case ARM::VLDRS:
1375 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1376 case ARM::VLDRD:
1377 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1378 case ARM::VSTRS:
1379 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1380 case ARM::VSTRD:
1381 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1382 case ARM::t2LDRi8:
1383 case ARM::t2LDRi12:
1384 return ARM::t2LDR_PRE;
1385 case ARM::t2STRi8:
1386 case ARM::t2STRi12:
1387 return ARM::t2STR_PRE;
1388 default: llvm_unreachable("Unhandled opcode!");
1389 }
1390}
1391
1392static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1393 ARM_AM::AddrOpc Mode) {
1394 switch (Opc) {
1395 case ARM::LDRi12:
1396 return ARM::LDR_POST_IMM;
1397 case ARM::STRi12:
1398 return ARM::STR_POST_IMM;
1399 case ARM::VLDRS:
1400 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1401 case ARM::VLDRD:
1402 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1403 case ARM::VSTRS:
1404 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1405 case ARM::VSTRD:
1406 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1407 case ARM::t2LDRi8:
1408 case ARM::t2LDRi12:
1409 return ARM::t2LDR_POST;
1410 case ARM::t2LDRBi8:
1411 case ARM::t2LDRBi12:
1412 return ARM::t2LDRB_POST;
1413 case ARM::t2LDRSBi8:
1414 case ARM::t2LDRSBi12:
1415 return ARM::t2LDRSB_POST;
1416 case ARM::t2LDRHi8:
1417 case ARM::t2LDRHi12:
1418 return ARM::t2LDRH_POST;
1419 case ARM::t2LDRSHi8:
1420 case ARM::t2LDRSHi12:
1421 return ARM::t2LDRSH_POST;
1422 case ARM::t2STRi8:
1423 case ARM::t2STRi12:
1424 return ARM::t2STR_POST;
1425 case ARM::t2STRBi8:
1426 case ARM::t2STRBi12:
1427 return ARM::t2STRB_POST;
1428 case ARM::t2STRHi8:
1429 case ARM::t2STRHi12:
1430 return ARM::t2STRH_POST;
1431
1432 case ARM::MVE_VLDRBS16:
1433 return ARM::MVE_VLDRBS16_post;
1434 case ARM::MVE_VLDRBS32:
1435 return ARM::MVE_VLDRBS32_post;
1436 case ARM::MVE_VLDRBU16:
1437 return ARM::MVE_VLDRBU16_post;
1438 case ARM::MVE_VLDRBU32:
1439 return ARM::MVE_VLDRBU32_post;
1440 case ARM::MVE_VLDRHS32:
1441 return ARM::MVE_VLDRHS32_post;
1442 case ARM::MVE_VLDRHU32:
1443 return ARM::MVE_VLDRHU32_post;
1444 case ARM::MVE_VLDRBU8:
1445 return ARM::MVE_VLDRBU8_post;
1446 case ARM::MVE_VLDRHU16:
1447 return ARM::MVE_VLDRHU16_post;
1448 case ARM::MVE_VLDRWU32:
1449 return ARM::MVE_VLDRWU32_post;
1450 case ARM::MVE_VSTRB16:
1451 return ARM::MVE_VSTRB16_post;
1452 case ARM::MVE_VSTRB32:
1453 return ARM::MVE_VSTRB32_post;
1454 case ARM::MVE_VSTRH32:
1455 return ARM::MVE_VSTRH32_post;
1456 case ARM::MVE_VSTRBU8:
1457 return ARM::MVE_VSTRBU8_post;
1458 case ARM::MVE_VSTRHU16:
1459 return ARM::MVE_VSTRHU16_post;
1460 case ARM::MVE_VSTRWU32:
1461 return ARM::MVE_VSTRWU32_post;
1462
1463 default: llvm_unreachable("Unhandled opcode!");
1464 }
1465}
1466
1467/// Fold proceeding/trailing inc/dec of base register into the
1468/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1469bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1470 // Thumb1 doesn't have updating LDR/STR.
1471 // FIXME: Use LDM/STM with single register instead.
1472 if (isThumb1) return false;
1473 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1474
1476 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1477 unsigned Opcode = MI->getOpcode();
1478 DebugLoc DL = MI->getDebugLoc();
1479 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1480 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1481 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1482 if (isi32Load(Opcode) || isi32Store(Opcode))
1483 if (MI->getOperand(2).getImm() != 0)
1484 return false;
1485 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1486 return false;
1487
1488 // Can't do the merge if the destination register is the same as the would-be
1489 // writeback register.
1490 if (MI->getOperand(0).getReg() == Base)
1491 return false;
1492
1493 Register PredReg;
1494 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1495 int Bytes = getLSMultipleTransferSize(MI);
1496 MachineBasicBlock &MBB = *MI->getParent();
1498 int Offset;
1500 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1501 unsigned NewOpc;
1502 if (!isAM5 && Offset == Bytes) {
1503 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1504 } else if (Offset == -Bytes) {
1505 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1506 } else {
1507 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1508 if (MergeInstr == MBB.end())
1509 return false;
1510
1512 if ((isAM5 && Offset != Bytes) ||
1513 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1515 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1516 return false;
1517 }
1518 }
1519 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1520 MBB.erase(MergeInstr);
1521
1523
1524 bool isLd = isLoadSingle(Opcode);
1525 if (isAM5) {
1526 // VLDM[SD]_UPD, VSTM[SD]_UPD
1527 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1528 // updating load/store-multiple instructions can be used with only one
1529 // register.)
1530 MachineOperand &MO = MI->getOperand(0);
1531 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1532 .addReg(Base, getDefRegState(true)) // WB base register
1533 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1534 .addImm(Pred)
1535 .addReg(PredReg)
1536 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1537 : getKillRegState(MO.isKill())))
1538 .cloneMemRefs(*MI);
1539 (void)MIB;
1540 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1541 } else if (isLd) {
1542 if (isAM2) {
1543 // LDR_PRE, LDR_POST
1544 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1545 auto MIB =
1546 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1548 .addReg(Base)
1549 .addImm(Offset)
1550 .addImm(Pred)
1551 .addReg(PredReg)
1552 .cloneMemRefs(*MI);
1553 (void)MIB;
1554 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1555 } else {
1557 auto MIB =
1558 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1560 .addReg(Base)
1561 .addReg(0)
1562 .addImm(Imm)
1563 .add(predOps(Pred, PredReg))
1564 .cloneMemRefs(*MI);
1565 (void)MIB;
1566 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1567 }
1568 } else {
1569 // t2LDR_PRE, t2LDR_POST
1570 auto MIB =
1571 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1573 .addReg(Base)
1574 .addImm(Offset)
1575 .add(predOps(Pred, PredReg))
1576 .cloneMemRefs(*MI);
1577 (void)MIB;
1578 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1579 }
1580 } else {
1581 MachineOperand &MO = MI->getOperand(0);
1582 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1583 // the vestigal zero-reg offset register. When that's fixed, this clause
1584 // can be removed entirely.
1585 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1587 // STR_PRE, STR_POST
1588 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1589 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1590 .addReg(Base)
1591 .addReg(0)
1592 .addImm(Imm)
1593 .add(predOps(Pred, PredReg))
1594 .cloneMemRefs(*MI);
1595 (void)MIB;
1596 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1597 } else {
1598 // t2STR_PRE, t2STR_POST
1599 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1600 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1601 .addReg(Base)
1602 .addImm(Offset)
1603 .add(predOps(Pred, PredReg))
1604 .cloneMemRefs(*MI);
1605 (void)MIB;
1606 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1607 }
1608 }
1609 MBB.erase(MBBI);
1610
1611 return true;
1612}
1613
1614bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1615 unsigned Opcode = MI.getOpcode();
1616 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1617 "Must have t2STRDi8 or t2LDRDi8");
1618 if (MI.getOperand(3).getImm() != 0)
1619 return false;
1620 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1621
1622 // Behaviour for writeback is undefined if base register is the same as one
1623 // of the others.
1624 const MachineOperand &BaseOp = MI.getOperand(2);
1625 Register Base = BaseOp.getReg();
1626 const MachineOperand &Reg0Op = MI.getOperand(0);
1627 const MachineOperand &Reg1Op = MI.getOperand(1);
1628 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1629 return false;
1630
1631 Register PredReg;
1632 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1634 MachineBasicBlock &MBB = *MI.getParent();
1635 int Offset;
1637 PredReg, Offset);
1638 unsigned NewOpc;
1639 if (Offset == 8 || Offset == -8) {
1640 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1641 } else {
1642 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1643 if (MergeInstr == MBB.end())
1644 return false;
1645 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1646 if (!isLegalAddressImm(NewOpc, Offset, TII))
1647 return false;
1648 }
1649 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1650 MBB.erase(MergeInstr);
1651
1652 DebugLoc DL = MI.getDebugLoc();
1653 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1654 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1655 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1656 } else {
1657 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1658 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1659 }
1660 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1661 .addImm(Offset).addImm(Pred).addReg(PredReg);
1662 assert(TII->get(Opcode).getNumOperands() == 6 &&
1663 TII->get(NewOpc).getNumOperands() == 7 &&
1664 "Unexpected number of operands in Opcode specification.");
1665
1666 // Transfer implicit operands.
1667 for (const MachineOperand &MO : MI.implicit_operands())
1668 MIB.add(MO);
1669 MIB.cloneMemRefs(MI);
1670
1671 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1672 MBB.erase(MBBI);
1673 return true;
1674}
1675
1676/// Returns true if instruction is a memory operation that this pass is capable
1677/// of operating on.
1678static bool isMemoryOp(const MachineInstr &MI) {
1679 unsigned Opcode = MI.getOpcode();
1680 switch (Opcode) {
1681 case ARM::VLDRS:
1682 case ARM::VSTRS:
1683 case ARM::VLDRD:
1684 case ARM::VSTRD:
1685 case ARM::LDRi12:
1686 case ARM::STRi12:
1687 case ARM::tLDRi:
1688 case ARM::tSTRi:
1689 case ARM::tLDRspi:
1690 case ARM::tSTRspi:
1691 case ARM::t2LDRi8:
1692 case ARM::t2LDRi12:
1693 case ARM::t2STRi8:
1694 case ARM::t2STRi12:
1695 break;
1696 default:
1697 return false;
1698 }
1699 if (!MI.getOperand(1).isReg())
1700 return false;
1701
1702 // When no memory operands are present, conservatively assume unaligned,
1703 // volatile, unfoldable.
1704 if (!MI.hasOneMemOperand())
1705 return false;
1706
1707 const MachineMemOperand &MMO = **MI.memoperands_begin();
1708
1709 // Don't touch volatile memory accesses - we may be changing their order.
1710 // TODO: We could allow unordered and monotonic atomics here, but we need to
1711 // make sure the resulting ldm/stm is correctly marked as atomic.
1712 if (MMO.isVolatile() || MMO.isAtomic())
1713 return false;
1714
1715 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1716 // not.
1717 if (MMO.getAlign() < Align(4))
1718 return false;
1719
1720 // str <undef> could probably be eliminated entirely, but for now we just want
1721 // to avoid making a mess of it.
1722 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1723 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1724 return false;
1725
1726 // Likewise don't mess with references to undefined addresses.
1727 if (MI.getOperand(1).isUndef())
1728 return false;
1729
1730 return true;
1731}
1732
1735 bool isDef, unsigned NewOpc, unsigned Reg,
1736 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1737 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1738 unsigned PredReg, const TargetInstrInfo *TII,
1739 MachineInstr *MI) {
1740 if (isDef) {
1741 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1742 TII->get(NewOpc))
1743 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1744 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1745 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1746 // FIXME: This is overly conservative; the new instruction accesses 4
1747 // bytes, not 8.
1748 MIB.cloneMemRefs(*MI);
1749 } else {
1750 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1751 TII->get(NewOpc))
1752 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1753 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1754 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1755 // FIXME: This is overly conservative; the new instruction accesses 4
1756 // bytes, not 8.
1757 MIB.cloneMemRefs(*MI);
1758 }
1759}
1760
1761bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1763 MachineInstr *MI = &*MBBI;
1764 unsigned Opcode = MI->getOpcode();
1765 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1766 // if we see this opcode.
1767 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1768 return false;
1769
1770 const MachineOperand &BaseOp = MI->getOperand(2);
1771 Register BaseReg = BaseOp.getReg();
1772 Register EvenReg = MI->getOperand(0).getReg();
1773 Register OddReg = MI->getOperand(1).getReg();
1774 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1775 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1776
1777 // ARM errata 602117: LDRD with base in list may result in incorrect base
1778 // register when interrupted or faulted.
1779 bool Errata602117 = EvenReg == BaseReg &&
1780 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1781 // ARM LDRD/STRD needs consecutive registers.
1782 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1783 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1784
1785 if (!Errata602117 && !NonConsecutiveRegs)
1786 return false;
1787
1788 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1789 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1790 bool EvenDeadKill = isLd ?
1791 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1792 bool EvenUndef = MI->getOperand(0).isUndef();
1793 bool OddDeadKill = isLd ?
1794 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1795 bool OddUndef = MI->getOperand(1).isUndef();
1796 bool BaseKill = BaseOp.isKill();
1797 bool BaseUndef = BaseOp.isUndef();
1798 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1799 "register offset not handled below");
1800 int OffImm = getMemoryOpOffset(*MI);
1801 Register PredReg;
1802 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1803
1804 if (OddRegNum > EvenRegNum && OffImm == 0) {
1805 // Ascending register numbers and no offset. It's safe to change it to a
1806 // ldm or stm.
1807 unsigned NewOpc = (isLd)
1808 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1809 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1810 if (isLd) {
1811 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1812 .addReg(BaseReg, getKillRegState(BaseKill))
1813 .addImm(Pred).addReg(PredReg)
1814 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1815 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1816 .cloneMemRefs(*MI);
1817 ++NumLDRD2LDM;
1818 } else {
1819 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1820 .addReg(BaseReg, getKillRegState(BaseKill))
1821 .addImm(Pred).addReg(PredReg)
1822 .addReg(EvenReg,
1823 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1824 .addReg(OddReg,
1825 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1826 .cloneMemRefs(*MI);
1827 ++NumSTRD2STM;
1828 }
1829 } else {
1830 // Split into two instructions.
1831 unsigned NewOpc = (isLd)
1832 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1833 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1834 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1835 // so adjust and use t2LDRi12 here for that.
1836 unsigned NewOpc2 = (isLd)
1837 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1838 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1839 // If this is a load, make sure the first load does not clobber the base
1840 // register before the second load reads it.
1841 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1842 assert(!TRI->regsOverlap(OddReg, BaseReg));
1843 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1844 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1845 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1846 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1847 MI);
1848 } else {
1849 if (OddReg == EvenReg && EvenDeadKill) {
1850 // If the two source operands are the same, the kill marker is
1851 // probably on the first one. e.g.
1852 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1853 EvenDeadKill = false;
1854 OddDeadKill = true;
1855 }
1856 // Never kill the base register in the first instruction.
1857 if (EvenReg == BaseReg)
1858 EvenDeadKill = false;
1859 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1860 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1861 MI);
1862 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1863 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1864 MI);
1865 }
1866 if (isLd)
1867 ++NumLDRD2LDR;
1868 else
1869 ++NumSTRD2STR;
1870 }
1871
1872 MBBI = MBB.erase(MBBI);
1873 return true;
1874}
1875
1876/// An optimization pass to turn multiple LDR / STR ops of the same base and
1877/// incrementing offset into LDM / STM ops.
1878bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1879 MemOpQueue MemOps;
1880 unsigned CurrBase = 0;
1881 unsigned CurrOpc = ~0u;
1882 ARMCC::CondCodes CurrPred = ARMCC::AL;
1883 unsigned Position = 0;
1884 assert(Candidates.size() == 0);
1885 assert(MergeBaseCandidates.size() == 0);
1886 LiveRegsValid = false;
1887
1889 I = MBBI) {
1890 // The instruction in front of the iterator is the one we look at.
1891 MBBI = std::prev(I);
1892 if (FixInvalidRegPairOp(MBB, MBBI))
1893 continue;
1894 ++Position;
1895
1896 if (isMemoryOp(*MBBI)) {
1897 unsigned Opcode = MBBI->getOpcode();
1898 const MachineOperand &MO = MBBI->getOperand(0);
1899 Register Reg = MO.getReg();
1901 Register PredReg;
1902 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1904 if (CurrBase == 0) {
1905 // Start of a new chain.
1906 CurrBase = Base;
1907 CurrOpc = Opcode;
1908 CurrPred = Pred;
1909 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1910 continue;
1911 }
1912 // Note: No need to match PredReg in the next if.
1913 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1914 // Watch out for:
1915 // r4 := ldr [r0, #8]
1916 // r4 := ldr [r0, #4]
1917 // or
1918 // r0 := ldr [r0]
1919 // If a load overrides the base register or a register loaded by
1920 // another load in our chain, we cannot take this instruction.
1921 bool Overlap = false;
1922 if (isLoadSingle(Opcode)) {
1923 Overlap = (Base == Reg);
1924 if (!Overlap) {
1925 for (const MemOpQueueEntry &E : MemOps) {
1926 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1927 Overlap = true;
1928 break;
1929 }
1930 }
1931 }
1932 }
1933
1934 if (!Overlap) {
1935 // Check offset and sort memory operation into the current chain.
1936 if (Offset > MemOps.back().Offset) {
1937 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1938 continue;
1939 } else {
1940 MemOpQueue::iterator MI, ME;
1941 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1942 if (Offset < MI->Offset) {
1943 // Found a place to insert.
1944 break;
1945 }
1946 if (Offset == MI->Offset) {
1947 // Collision, abort.
1948 MI = ME;
1949 break;
1950 }
1951 }
1952 if (MI != MemOps.end()) {
1953 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1954 continue;
1955 }
1956 }
1957 }
1958 }
1959
1960 // Don't advance the iterator; The op will start a new chain next.
1961 MBBI = I;
1962 --Position;
1963 // Fallthrough to look into existing chain.
1964 } else if (MBBI->isDebugInstr()) {
1965 continue;
1966 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1967 MBBI->getOpcode() == ARM::t2STRDi8) {
1968 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1969 // remember them because we may still be able to merge add/sub into them.
1970 MergeBaseCandidates.push_back(&*MBBI);
1971 }
1972
1973 // If we are here then the chain is broken; Extract candidates for a merge.
1974 if (MemOps.size() > 0) {
1975 FormCandidates(MemOps);
1976 // Reset for the next chain.
1977 CurrBase = 0;
1978 CurrOpc = ~0u;
1979 CurrPred = ARMCC::AL;
1980 MemOps.clear();
1981 }
1982 }
1983 if (MemOps.size() > 0)
1984 FormCandidates(MemOps);
1985
1986 // Sort candidates so they get processed from end to begin of the basic
1987 // block later; This is necessary for liveness calculation.
1988 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1989 return M0->InsertPos < M1->InsertPos;
1990 };
1991 llvm::sort(Candidates, LessThan);
1992
1993 // Go through list of candidates and merge.
1994 bool Changed = false;
1995 for (const MergeCandidate *Candidate : Candidates) {
1996 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1997 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1998 // Merge preceding/trailing base inc/dec into the merged op.
1999 if (Merged) {
2000 Changed = true;
2001 unsigned Opcode = Merged->getOpcode();
2002 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2003 MergeBaseUpdateLSDouble(*Merged);
2004 else
2005 MergeBaseUpdateLSMultiple(Merged);
2006 } else {
2007 for (MachineInstr *MI : Candidate->Instrs) {
2008 if (MergeBaseUpdateLoadStore(MI))
2009 Changed = true;
2010 }
2011 }
2012 } else {
2013 assert(Candidate->Instrs.size() == 1);
2014 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2015 Changed = true;
2016 }
2017 }
2018 Candidates.clear();
2019 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2020 for (MachineInstr *MI : MergeBaseCandidates)
2021 MergeBaseUpdateLSDouble(*MI);
2022 MergeBaseCandidates.clear();
2023
2024 return Changed;
2025}
2026
2027/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2028/// into the preceding stack restore so it directly restore the value of LR
2029/// into pc.
2030/// ldmfd sp!, {..., lr}
2031/// bx lr
2032/// or
2033/// ldmfd sp!, {..., lr}
2034/// mov pc, lr
2035/// =>
2036/// ldmfd sp!, {..., pc}
2037bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2038 // Thumb1 LDM doesn't allow high registers.
2039 if (isThumb1) return false;
2040 if (MBB.empty()) return false;
2041
2043 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2044 (MBBI->getOpcode() == ARM::BX_RET ||
2045 MBBI->getOpcode() == ARM::tBX_RET ||
2046 MBBI->getOpcode() == ARM::MOVPCLR)) {
2047 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2048 // Ignore any debug instructions.
2049 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2050 --PrevI;
2051 MachineInstr &PrevMI = *PrevI;
2052 unsigned Opcode = PrevMI.getOpcode();
2053 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2054 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2055 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2056 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2057 if (MO.getReg() != ARM::LR)
2058 return false;
2059 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2060 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2061 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2062 PrevMI.setDesc(TII->get(NewOpc));
2063 MO.setReg(ARM::PC);
2064 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2065 MBB.erase(MBBI);
2066 return true;
2067 }
2068 }
2069 return false;
2070}
2071
2072bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2074 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2075 MBBI->getOpcode() != ARM::tBX_RET)
2076 return false;
2077
2079 --Prev;
2080 if (Prev->getOpcode() != ARM::tMOVr ||
2081 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2082 return false;
2083
2084 for (auto Use : Prev->uses())
2085 if (Use.isKill()) {
2086 assert(STI->hasV4TOps());
2087 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2088 .addReg(Use.getReg(), RegState::Kill)
2091 MBB.erase(MBBI);
2092 MBB.erase(Prev);
2093 return true;
2094 }
2095
2096 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2097}
2098
2099bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2100 if (skipFunction(Fn.getFunction()))
2101 return false;
2102
2103 MF = &Fn;
2104 STI = &Fn.getSubtarget<ARMSubtarget>();
2105 TL = STI->getTargetLowering();
2106 AFI = Fn.getInfo<ARMFunctionInfo>();
2107 TII = STI->getInstrInfo();
2108 TRI = STI->getRegisterInfo();
2109
2110 RegClassInfoValid = false;
2111 isThumb2 = AFI->isThumb2Function();
2112 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2113
2114 bool Modified = false, ModifiedLDMReturn = false;
2115 for (MachineBasicBlock &MBB : Fn) {
2116 Modified |= LoadStoreMultipleOpti(MBB);
2117 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2118 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2119 if (isThumb1)
2120 Modified |= CombineMovBx(MBB);
2121 }
2122 Modified |= ModifiedLDMReturn;
2123
2124 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2125 // LR is restored. This check needs to consider the whole function, not just
2126 // the instruction(s) we changed, because there may be other BX returns which
2127 // still need LR to be restored.
2128 if (ModifiedLDMReturn)
2130
2131 Allocator.DestroyAll();
2132 return Modified;
2133}
2134
2135#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2136 "ARM pre- register allocation load / store optimization pass"
2137
2138namespace {
2139
2140 /// Pre- register allocation pass that move load / stores from consecutive
2141 /// locations close to make it more likely they will be combined later.
2142 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2143 static char ID;
2144
2145 AliasAnalysis *AA;
2146 const DataLayout *TD;
2147 const TargetInstrInfo *TII;
2148 const TargetRegisterInfo *TRI;
2149 const ARMSubtarget *STI;
2152 MachineFunction *MF;
2153
2154 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2155
2156 bool runOnMachineFunction(MachineFunction &Fn) override;
2157
2158 StringRef getPassName() const override {
2160 }
2161
2162 void getAnalysisUsage(AnalysisUsage &AU) const override {
2167 }
2168
2169 private:
2170 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2171 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2172 Register &BaseReg, int &Offset, Register &PredReg,
2173 ARMCC::CondCodes &Pred, bool &isT2);
2174 bool RescheduleOps(
2176 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2178 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2179 bool DistributeIncrements();
2180 bool DistributeIncrements(Register Base);
2181 };
2182
2183} // end anonymous namespace
2184
2185char ARMPreAllocLoadStoreOpt::ID = 0;
2186
2187INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2190INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2192
2193// Limit the number of instructions to be rescheduled.
2194// FIXME: tune this limit, and/or come up with some better heuristics.
2195static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2196 cl::init(8), cl::Hidden);
2197
2198bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2199 if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2200 return false;
2201
2202 TD = &Fn.getDataLayout();
2203 STI = &Fn.getSubtarget<ARMSubtarget>();
2204 TII = STI->getInstrInfo();
2205 TRI = STI->getRegisterInfo();
2206 MRI = &Fn.getRegInfo();
2207 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2208 MF = &Fn;
2209 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2210
2211 bool Modified = DistributeIncrements();
2212 for (MachineBasicBlock &MFI : Fn)
2213 Modified |= RescheduleLoadStoreInstrs(&MFI);
2214
2215 return Modified;
2216}
2217
2218static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2222 SmallSet<unsigned, 4> &MemRegs,
2223 const TargetRegisterInfo *TRI,
2224 AliasAnalysis *AA) {
2225 // Are there stores / loads / calls between them?
2226 SmallSet<unsigned, 4> AddedRegPressure;
2227 while (++I != E) {
2228 if (I->isDebugInstr() || MemOps.count(&*I))
2229 continue;
2230 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2231 return false;
2232 if (I->mayStore() || (!isLd && I->mayLoad()))
2233 for (MachineInstr *MemOp : MemOps)
2234 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2235 return false;
2236 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2237 MachineOperand &MO = I->getOperand(j);
2238 if (!MO.isReg())
2239 continue;
2240 Register Reg = MO.getReg();
2241 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2242 return false;
2243 if (Reg != Base && !MemRegs.count(Reg))
2244 AddedRegPressure.insert(Reg);
2245 }
2246 }
2247
2248 // Estimate register pressure increase due to the transformation.
2249 if (MemRegs.size() <= 4)
2250 // Ok if we are moving small number of instructions.
2251 return true;
2252 return AddedRegPressure.size() <= MemRegs.size() * 2;
2253}
2254
2255bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2256 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2257 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2258 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2259 // Make sure we're allowed to generate LDRD/STRD.
2260 if (!STI->hasV5TEOps())
2261 return false;
2262
2263 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2264 unsigned Scale = 1;
2265 unsigned Opcode = Op0->getOpcode();
2266 if (Opcode == ARM::LDRi12) {
2267 NewOpc = ARM::LDRD;
2268 } else if (Opcode == ARM::STRi12) {
2269 NewOpc = ARM::STRD;
2270 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2271 NewOpc = ARM::t2LDRDi8;
2272 Scale = 4;
2273 isT2 = true;
2274 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2275 NewOpc = ARM::t2STRDi8;
2276 Scale = 4;
2277 isT2 = true;
2278 } else {
2279 return false;
2280 }
2281
2282 // Make sure the base address satisfies i64 ld / st alignment requirement.
2283 // At the moment, we ignore the memoryoperand's value.
2284 // If we want to use AliasAnalysis, we should check it accordingly.
2285 if (!Op0->hasOneMemOperand() ||
2286 (*Op0->memoperands_begin())->isVolatile() ||
2287 (*Op0->memoperands_begin())->isAtomic())
2288 return false;
2289
2290 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2291 Align ReqAlign = STI->getDualLoadStoreAlignment();
2292 if (Alignment < ReqAlign)
2293 return false;
2294
2295 // Then make sure the immediate offset fits.
2296 int OffImm = getMemoryOpOffset(*Op0);
2297 if (isT2) {
2298 int Limit = (1 << 8) * Scale;
2299 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2300 return false;
2301 Offset = OffImm;
2302 } else {
2304 if (OffImm < 0) {
2306 OffImm = - OffImm;
2307 }
2308 int Limit = (1 << 8) * Scale;
2309 if (OffImm >= Limit || (OffImm & (Scale-1)))
2310 return false;
2311 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2312 }
2313 FirstReg = Op0->getOperand(0).getReg();
2314 SecondReg = Op1->getOperand(0).getReg();
2315 if (FirstReg == SecondReg)
2316 return false;
2317 BaseReg = Op0->getOperand(1).getReg();
2318 Pred = getInstrPredicate(*Op0, PredReg);
2319 dl = Op0->getDebugLoc();
2320 return true;
2321}
2322
2323bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2325 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2327 bool RetVal = false;
2328
2329 // Sort by offset (in reverse order).
2330 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2331 int LOffset = getMemoryOpOffset(*LHS);
2332 int ROffset = getMemoryOpOffset(*RHS);
2333 assert(LHS == RHS || LOffset != ROffset);
2334 return LOffset > ROffset;
2335 });
2336
2337 // The loads / stores of the same base are in order. Scan them from first to
2338 // last and check for the following:
2339 // 1. Any def of base.
2340 // 2. Any gaps.
2341 while (Ops.size() > 1) {
2342 unsigned FirstLoc = ~0U;
2343 unsigned LastLoc = 0;
2344 MachineInstr *FirstOp = nullptr;
2345 MachineInstr *LastOp = nullptr;
2346 int LastOffset = 0;
2347 unsigned LastOpcode = 0;
2348 unsigned LastBytes = 0;
2349 unsigned NumMove = 0;
2350 for (MachineInstr *Op : llvm::reverse(Ops)) {
2351 // Make sure each operation has the same kind.
2352 unsigned LSMOpcode
2353 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2354 if (LastOpcode && LSMOpcode != LastOpcode)
2355 break;
2356
2357 // Check that we have a continuous set of offsets.
2358 int Offset = getMemoryOpOffset(*Op);
2359 unsigned Bytes = getLSMultipleTransferSize(Op);
2360 if (LastBytes) {
2361 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2362 break;
2363 }
2364
2365 // Don't try to reschedule too many instructions.
2366 if (NumMove == InstReorderLimit)
2367 break;
2368
2369 // Found a mergable instruction; save information about it.
2370 ++NumMove;
2371 LastOffset = Offset;
2372 LastBytes = Bytes;
2373 LastOpcode = LSMOpcode;
2374
2375 unsigned Loc = MI2LocMap[Op];
2376 if (Loc <= FirstLoc) {
2377 FirstLoc = Loc;
2378 FirstOp = Op;
2379 }
2380 if (Loc >= LastLoc) {
2381 LastLoc = Loc;
2382 LastOp = Op;
2383 }
2384 }
2385
2386 if (NumMove <= 1)
2387 Ops.pop_back();
2388 else {
2390 SmallSet<unsigned, 4> MemRegs;
2391 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2392 MemOps.insert(Ops[i]);
2393 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2394 }
2395
2396 // Be conservative, if the instructions are too far apart, don't
2397 // move them. We want to limit the increase of register pressure.
2398 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2399 if (DoMove)
2400 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2401 MemOps, MemRegs, TRI, AA);
2402 if (!DoMove) {
2403 for (unsigned i = 0; i != NumMove; ++i)
2404 Ops.pop_back();
2405 } else {
2406 // This is the new location for the loads / stores.
2407 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2408 while (InsertPos != MBB->end() &&
2409 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2410 ++InsertPos;
2411
2412 // If we are moving a pair of loads / stores, see if it makes sense
2413 // to try to allocate a pair of registers that can form register pairs.
2414 MachineInstr *Op0 = Ops.back();
2415 MachineInstr *Op1 = Ops[Ops.size()-2];
2416 Register FirstReg, SecondReg;
2417 Register BaseReg, PredReg;
2419 bool isT2 = false;
2420 unsigned NewOpc = 0;
2421 int Offset = 0;
2422 DebugLoc dl;
2423 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2424 FirstReg, SecondReg, BaseReg,
2425 Offset, PredReg, Pred, isT2)) {
2426 Ops.pop_back();
2427 Ops.pop_back();
2428
2429 const MCInstrDesc &MCID = TII->get(NewOpc);
2430 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2431 MRI->constrainRegClass(FirstReg, TRC);
2432 MRI->constrainRegClass(SecondReg, TRC);
2433
2434 // Form the pair instruction.
2435 if (isLd) {
2436 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2437 .addReg(FirstReg, RegState::Define)
2438 .addReg(SecondReg, RegState::Define)
2439 .addReg(BaseReg);
2440 // FIXME: We're converting from LDRi12 to an insn that still
2441 // uses addrmode2, so we need an explicit offset reg. It should
2442 // always by reg0 since we're transforming LDRi12s.
2443 if (!isT2)
2444 MIB.addReg(0);
2445 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2446 MIB.cloneMergedMemRefs({Op0, Op1});
2447 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2448 ++NumLDRDFormed;
2449 } else {
2450 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2451 .addReg(FirstReg)
2452 .addReg(SecondReg)
2453 .addReg(BaseReg);
2454 // FIXME: We're converting from LDRi12 to an insn that still
2455 // uses addrmode2, so we need an explicit offset reg. It should
2456 // always by reg0 since we're transforming STRi12s.
2457 if (!isT2)
2458 MIB.addReg(0);
2459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2460 MIB.cloneMergedMemRefs({Op0, Op1});
2461 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2462 ++NumSTRDFormed;
2463 }
2464 MBB->erase(Op0);
2465 MBB->erase(Op1);
2466
2467 if (!isT2) {
2468 // Add register allocation hints to form register pairs.
2469 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2470 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2471 }
2472 } else {
2473 for (unsigned i = 0; i != NumMove; ++i) {
2474 MachineInstr *Op = Ops.pop_back_val();
2475 if (isLd) {
2476 // Populate RegisterMap with all Registers defined by loads.
2477 Register Reg = Op->getOperand(0).getReg();
2478 RegisterMap[Reg];
2479 }
2480
2481 MBB->splice(InsertPos, MBB, Op);
2482 }
2483 }
2484
2485 NumLdStMoved += NumMove;
2486 RetVal = true;
2487 }
2488 }
2489 }
2490
2491 return RetVal;
2492}
2493
2495 std::function<void(MachineOperand &)> Fn) {
2496 if (MI->isNonListDebugValue()) {
2497 auto &Op = MI->getOperand(0);
2498 if (Op.isReg())
2499 Fn(Op);
2500 } else {
2501 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2502 auto &Op = MI->getOperand(I);
2503 if (Op.isReg())
2504 Fn(Op);
2505 }
2506 }
2507}
2508
2509// Update the RegisterMap with the instruction that was moved because a
2510// DBG_VALUE_LIST may need to be moved again.
2513 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2514
2515 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2516 auto RegIt = RegisterMap.find(Op.getReg());
2517 if (RegIt == RegisterMap.end())
2518 return;
2519 auto &InstrVec = RegIt->getSecond();
2520 for (unsigned I = 0; I < InstrVec.size(); I++)
2521 if (InstrVec[I] == InstrToReplace)
2522 InstrVec[I] = DbgValueListInstr;
2523 });
2524}
2525
2527 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2528 MI->getDebugLoc()->getInlinedAt());
2529 return DbgVar;
2530}
2531
2532bool
2533ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2534 bool RetVal = false;
2535
2539 using BaseVec = SmallVector<unsigned, 4>;
2540 Base2InstMap Base2LdsMap;
2541 Base2InstMap Base2StsMap;
2542 BaseVec LdBases;
2543 BaseVec StBases;
2544 // This map is used to track the relationship between the virtual
2545 // register that is the result of a load that is moved and the DBG_VALUE
2546 // MachineInstr pointer that uses that virtual register.
2548
2549 unsigned Loc = 0;
2552 while (MBBI != E) {
2553 for (; MBBI != E; ++MBBI) {
2554 MachineInstr &MI = *MBBI;
2555 if (MI.isCall() || MI.isTerminator()) {
2556 // Stop at barriers.
2557 ++MBBI;
2558 break;
2559 }
2560
2561 if (!MI.isDebugInstr())
2562 MI2LocMap[&MI] = ++Loc;
2563
2564 if (!isMemoryOp(MI))
2565 continue;
2566 Register PredReg;
2567 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2568 continue;
2569
2570 int Opc = MI.getOpcode();
2571 bool isLd = isLoadSingle(Opc);
2572 Register Base = MI.getOperand(1).getReg();
2574 bool StopHere = false;
2575 auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
2576 MapIt BI = Base2Ops.find(Base);
2577 if (BI == Base2Ops.end()) {
2578 Base2Ops[Base].push_back(&MI);
2579 Bases.push_back(Base);
2580 return;
2581 }
2582 for (const MachineInstr *MI : BI->second) {
2583 if (Offset == getMemoryOpOffset(*MI)) {
2584 StopHere = true;
2585 break;
2586 }
2587 }
2588 if (!StopHere)
2589 BI->second.push_back(&MI);
2590 };
2591
2592 if (isLd)
2593 FindBases(Base2LdsMap, LdBases);
2594 else
2595 FindBases(Base2StsMap, StBases);
2596
2597 if (StopHere) {
2598 // Found a duplicate (a base+offset combination that's seen earlier).
2599 // Backtrack.
2600 --Loc;
2601 break;
2602 }
2603 }
2604
2605 // Re-schedule loads.
2606 for (unsigned Base : LdBases) {
2607 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2608 if (Lds.size() > 1)
2609 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2610 }
2611
2612 // Re-schedule stores.
2613 for (unsigned Base : StBases) {
2614 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2615 if (Sts.size() > 1)
2616 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2617 }
2618
2619 if (MBBI != E) {
2620 Base2LdsMap.clear();
2621 Base2StsMap.clear();
2622 LdBases.clear();
2623 StBases.clear();
2624 }
2625 }
2626
2627 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2628 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2629 // use-before-def, resulting in a loss of debug info.
2630
2631 // Example:
2632 // Before the Pre Register Allocation Load Store Pass
2633 // inst_a
2634 // %2 = ld ...
2635 // inst_b
2636 // DBG_VALUE %2, "x", ...
2637 // %3 = ld ...
2638
2639 // After the Pass:
2640 // inst_a
2641 // inst_b
2642 // DBG_VALUE %2, "x", ...
2643 // %2 = ld ...
2644 // %3 = ld ...
2645
2646 // The code below addresses this by moving the DBG_VALUE to the position
2647 // immediately after the load.
2648
2649 // Example:
2650 // After the code below:
2651 // inst_a
2652 // inst_b
2653 // %2 = ld ...
2654 // DBG_VALUE %2, "x", ...
2655 // %3 = ld ...
2656
2657 // The algorithm works in two phases: First RescheduleOps() populates the
2658 // RegisterMap with registers that were moved as keys, there is no value
2659 // inserted. In the next phase, every MachineInstr in a basic block is
2660 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2661 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2662 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2663 // describes debug information for a variable that already exists in the
2664 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2665 // be set to undef. If the current MachineInstr is a load that was moved,
2666 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2667 // the load.
2668
2669 // To illustrate the above algorithm visually let's take this example.
2670
2671 // Before the Pre Register Allocation Load Store Pass:
2672 // %2 = ld ...
2673 // DBG_VALUE %2, A, .... # X
2674 // DBG_VALUE 0, A, ... # Y
2675 // %3 = ld ...
2676 // DBG_VALUE %3, A, ..., # Z
2677 // %4 = ld ...
2678
2679 // After Pre Register Allocation Load Store Pass:
2680 // DBG_VALUE %2, A, .... # X
2681 // DBG_VALUE 0, A, ... # Y
2682 // DBG_VALUE %3, A, ..., # Z
2683 // %2 = ld ...
2684 // %3 = ld ...
2685 // %4 = ld ...
2686
2687 // The algorithm below does the following:
2688
2689 // In the beginning, the RegisterMap will have been populated with the virtual
2690 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2691 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2692 // InstrMap {}
2693 // -> DBG_VALUE %2, A, .... # X
2694 // DBG_VALUE 0, A, ... # Y
2695 // DBG_VALUE %3, A, ..., # Z
2696 // %2 = ld ...
2697 // %3 = ld ...
2698 // %4 = ld ...
2699
2700 // After the first DBG_VALUE (denoted with an X) is processed, the
2701 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2702 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2703 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2704 // DBG_VALUE %2, A, .... # X
2705 // -> DBG_VALUE 0, A, ... # Y
2706 // DBG_VALUE %3, A, ..., # Z
2707 // %2 = ld ...
2708 // %3 = ld ...
2709 // %4 = ld ...
2710
2711 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2712 // to now hold Y for A and the RegisterMap is also updated to remove X from
2713 // %2, this is because both X and Y describe the same debug variable A. X is
2714 // also updated to have a $noreg as the first operand.
2715 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2716 // InstrMap = {X-> 2}
2717 // DBG_VALUE $noreg, A, .... # X
2718 // DBG_VALUE 0, A, ... # Y
2719 // -> DBG_VALUE %3, A, ..., # Z
2720 // %2 = ld ...
2721 // %3 = ld ...
2722 // %4 = ld ...
2723
2724 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2725 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2726 // is updated to have Z mapped to %3. This is again because Z describes the
2727 // debug variable A, Y is not updated to have $noreg as first operand because
2728 // its first operand is an immediate, not a register.
2729 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2730 // InstrMap = {X -> 2, Z -> 3}
2731 // DBG_VALUE $noreg, A, .... # X
2732 // DBG_VALUE 0, A, ... # Y
2733 // DBG_VALUE %3, A, ..., # Z
2734 // -> %2 = ld ...
2735 // %3 = ld ...
2736 // %4 = ld ...
2737
2738 // Nothing happens here since the RegisterMap for %2 contains no value.
2739 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2740 // InstrMap = {X -> 2, Z -> 3}
2741 // DBG_VALUE $noreg, A, .... # X
2742 // DBG_VALUE 0, A, ... # Y
2743 // DBG_VALUE %3, A, ..., # Z
2744 // %2 = ld ...
2745 // -> %3 = ld ...
2746 // %4 = ld ...
2747
2748 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2749 // pointer Z is copied to come after the load for %3 and the old Z's first
2750 // operand is changed to $noreg the Basic Block iterator is moved to after the
2751 // DBG_VALUE Z's new position.
2752 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2753 // InstrMap = {X -> 2, Z -> 3}
2754 // DBG_VALUE $noreg, A, .... # X
2755 // DBG_VALUE 0, A, ... # Y
2756 // DBG_VALUE $noreg, A, ..., # Old Z
2757 // %2 = ld ...
2758 // %3 = ld ...
2759 // DBG_VALUE %3, A, ..., # Z
2760 // -> %4 = ld ...
2761
2762 // Nothing happens for %4 and the algorithm exits having processed the entire
2763 // Basic Block.
2764 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2765 // InstrMap = {X -> 2, Z -> 3}
2766 // DBG_VALUE $noreg, A, .... # X
2767 // DBG_VALUE 0, A, ... # Y
2768 // DBG_VALUE $noreg, A, ..., # Old Z
2769 // %2 = ld ...
2770 // %3 = ld ...
2771 // DBG_VALUE %3, A, ..., # Z
2772 // %4 = ld ...
2773
2774 // This map is used to track the relationship between
2775 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2776 // debug information for that Debug Variable.
2778 // This map is used to track the relationship between a DBG_VALUE or
2779 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2781 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2782 MachineInstr &MI = *MBBI;
2783
2784 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2785 auto RegIt = RegisterMap.find(Reg);
2786 if (RegIt == RegisterMap.end())
2787 return;
2788 auto &InstrVec = RegIt->getSecond();
2789 InstrVec.push_back(&MI);
2790 InstrMap[&MI].push_back(Reg);
2791 };
2792
2793 if (MI.isDebugValue()) {
2794 assert(MI.getDebugVariable() &&
2795 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2796
2798 // If the first operand is a register and it exists in the RegisterMap, we
2799 // know this is a DBG_VALUE that uses the result of a load that was moved,
2800 // and is therefore a candidate to also be moved, add it to the
2801 // RegisterMap and InstrMap.
2803 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2804 });
2805
2806 // If the current DBG_VALUE describes the same variable as one of the
2807 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2808 // undef. Moving one DBG_VALUE past another would result in the variable's
2809 // value going back in time when stepping through the block in the
2810 // debugger.
2811 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2812 if (InstrIt != DbgValueSinkCandidates.end()) {
2813 auto *Instr = InstrIt->getSecond();
2814 auto RegIt = InstrMap.find(Instr);
2815 if (RegIt != InstrMap.end()) {
2816 const auto &RegVec = RegIt->getSecond();
2817 // For every Register in the RegVec, remove the MachineInstr in the
2818 // RegisterMap that describes the DbgVar.
2819 for (auto &Reg : RegVec) {
2820 auto RegIt = RegisterMap.find(Reg);
2821 if (RegIt == RegisterMap.end())
2822 continue;
2823 auto &InstrVec = RegIt->getSecond();
2824 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2826 return Var == DbgVar;
2827 };
2828
2829 llvm::erase_if(InstrVec, IsDbgVar);
2830 }
2832 [&](MachineOperand &Op) { Op.setReg(0); });
2833 }
2834 }
2835 DbgValueSinkCandidates[DbgVar] = &MI;
2836 } else {
2837 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2838 // then move that DBG_VALUE to below the load.
2839 auto Opc = MI.getOpcode();
2840 if (!isLoadSingle(Opc))
2841 continue;
2842 auto Reg = MI.getOperand(0).getReg();
2843 auto RegIt = RegisterMap.find(Reg);
2844 if (RegIt == RegisterMap.end())
2845 continue;
2846 auto &DbgInstrVec = RegIt->getSecond();
2847 if (!DbgInstrVec.size())
2848 continue;
2849 for (auto *DbgInstr : DbgInstrVec) {
2850 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2851 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2852 MBB->insert(InsertPos, ClonedMI);
2853 MBBI++;
2854 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2855 // that was moved.
2856 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2857 auto DbgIt = DbgValueSinkCandidates.find(DbgVar);
2858 // If the instruction is a DBG_VALUE_LIST, it may have already been
2859 // erased from the DbgValueSinkCandidates. Only erase if it exists in
2860 // the DbgValueSinkCandidates.
2861 if (DbgIt != DbgValueSinkCandidates.end())
2862 DbgValueSinkCandidates.erase(DbgIt);
2863 // Zero out original dbg instr
2864 forEachDbgRegOperand(DbgInstr,
2865 [&](MachineOperand &Op) { Op.setReg(0); });
2866 // Update RegisterMap with ClonedMI because it might have to be moved
2867 // again.
2868 if (DbgInstr->isDebugValueList())
2869 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2870 DbgInstr);
2871 }
2872 }
2873 }
2874 return RetVal;
2875}
2876
2877// Get the Base register operand index from the memory access MachineInst if we
2878// should attempt to distribute postinc on it. Return -1 if not of a valid
2879// instruction type. If it returns an index, it is assumed that instruction is a
2880// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2882 switch (MI.getOpcode()) {
2883 case ARM::MVE_VLDRBS16:
2884 case ARM::MVE_VLDRBS32:
2885 case ARM::MVE_VLDRBU16:
2886 case ARM::MVE_VLDRBU32:
2887 case ARM::MVE_VLDRHS32:
2888 case ARM::MVE_VLDRHU32:
2889 case ARM::MVE_VLDRBU8:
2890 case ARM::MVE_VLDRHU16:
2891 case ARM::MVE_VLDRWU32:
2892 case ARM::MVE_VSTRB16:
2893 case ARM::MVE_VSTRB32:
2894 case ARM::MVE_VSTRH32:
2895 case ARM::MVE_VSTRBU8:
2896 case ARM::MVE_VSTRHU16:
2897 case ARM::MVE_VSTRWU32:
2898 case ARM::t2LDRHi8:
2899 case ARM::t2LDRHi12:
2900 case ARM::t2LDRSHi8:
2901 case ARM::t2LDRSHi12:
2902 case ARM::t2LDRBi8:
2903 case ARM::t2LDRBi12:
2904 case ARM::t2LDRSBi8:
2905 case ARM::t2LDRSBi12:
2906 case ARM::t2STRBi8:
2907 case ARM::t2STRBi12:
2908 case ARM::t2STRHi8:
2909 case ARM::t2STRHi12:
2910 return 1;
2911 case ARM::MVE_VLDRBS16_post:
2912 case ARM::MVE_VLDRBS32_post:
2913 case ARM::MVE_VLDRBU16_post:
2914 case ARM::MVE_VLDRBU32_post:
2915 case ARM::MVE_VLDRHS32_post:
2916 case ARM::MVE_VLDRHU32_post:
2917 case ARM::MVE_VLDRBU8_post:
2918 case ARM::MVE_VLDRHU16_post:
2919 case ARM::MVE_VLDRWU32_post:
2920 case ARM::MVE_VSTRB16_post:
2921 case ARM::MVE_VSTRB32_post:
2922 case ARM::MVE_VSTRH32_post:
2923 case ARM::MVE_VSTRBU8_post:
2924 case ARM::MVE_VSTRHU16_post:
2925 case ARM::MVE_VSTRWU32_post:
2926 case ARM::MVE_VLDRBS16_pre:
2927 case ARM::MVE_VLDRBS32_pre:
2928 case ARM::MVE_VLDRBU16_pre:
2929 case ARM::MVE_VLDRBU32_pre:
2930 case ARM::MVE_VLDRHS32_pre:
2931 case ARM::MVE_VLDRHU32_pre:
2932 case ARM::MVE_VLDRBU8_pre:
2933 case ARM::MVE_VLDRHU16_pre:
2934 case ARM::MVE_VLDRWU32_pre:
2935 case ARM::MVE_VSTRB16_pre:
2936 case ARM::MVE_VSTRB32_pre:
2937 case ARM::MVE_VSTRH32_pre:
2938 case ARM::MVE_VSTRBU8_pre:
2939 case ARM::MVE_VSTRHU16_pre:
2940 case ARM::MVE_VSTRWU32_pre:
2941 return 2;
2942 }
2943 return -1;
2944}
2945
2947 switch (MI.getOpcode()) {
2948 case ARM::MVE_VLDRBS16_post:
2949 case ARM::MVE_VLDRBS32_post:
2950 case ARM::MVE_VLDRBU16_post:
2951 case ARM::MVE_VLDRBU32_post:
2952 case ARM::MVE_VLDRHS32_post:
2953 case ARM::MVE_VLDRHU32_post:
2954 case ARM::MVE_VLDRBU8_post:
2955 case ARM::MVE_VLDRHU16_post:
2956 case ARM::MVE_VLDRWU32_post:
2957 case ARM::MVE_VSTRB16_post:
2958 case ARM::MVE_VSTRB32_post:
2959 case ARM::MVE_VSTRH32_post:
2960 case ARM::MVE_VSTRBU8_post:
2961 case ARM::MVE_VSTRHU16_post:
2962 case ARM::MVE_VSTRWU32_post:
2963 return true;
2964 }
2965 return false;
2966}
2967
2969 switch (MI.getOpcode()) {
2970 case ARM::MVE_VLDRBS16_pre:
2971 case ARM::MVE_VLDRBS32_pre:
2972 case ARM::MVE_VLDRBU16_pre:
2973 case ARM::MVE_VLDRBU32_pre:
2974 case ARM::MVE_VLDRHS32_pre:
2975 case ARM::MVE_VLDRHU32_pre:
2976 case ARM::MVE_VLDRBU8_pre:
2977 case ARM::MVE_VLDRHU16_pre:
2978 case ARM::MVE_VLDRWU32_pre:
2979 case ARM::MVE_VSTRB16_pre:
2980 case ARM::MVE_VSTRB32_pre:
2981 case ARM::MVE_VSTRH32_pre:
2982 case ARM::MVE_VSTRBU8_pre:
2983 case ARM::MVE_VSTRHU16_pre:
2984 case ARM::MVE_VSTRWU32_pre:
2985 return true;
2986 }
2987 return false;
2988}
2989
2990// Given a memory access Opcode, check that the give Imm would be a valid Offset
2991// for this instruction (same as isLegalAddressImm), Or if the instruction
2992// could be easily converted to one where that was valid. For example converting
2993// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
2994// AdjustBaseAndOffset below.
2995static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
2996 const TargetInstrInfo *TII,
2997 int &CodesizeEstimate) {
2998 if (isLegalAddressImm(Opcode, Imm, TII))
2999 return true;
3000
3001 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
3002 const MCInstrDesc &Desc = TII->get(Opcode);
3003 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
3004 switch (AddrMode) {
3006 CodesizeEstimate += 1;
3007 return Imm < 0 && -Imm < ((1 << 8) * 1);
3008 }
3009 return false;
3010}
3011
3012// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3013// by -Offset. This can either happen in-place or be a replacement as MI is
3014// converted to another instruction type.
3016 int Offset, const TargetInstrInfo *TII,
3017 const TargetRegisterInfo *TRI) {
3018 // Set the Base reg
3019 unsigned BaseOp = getBaseOperandIndex(*MI);
3020 MI->getOperand(BaseOp).setReg(NewBaseReg);
3021 // and constrain the reg class to that required by the instruction.
3022 MachineFunction *MF = MI->getMF();
3024 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3025 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
3026 MRI.constrainRegClass(NewBaseReg, TRC);
3027
3028 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3029 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3030 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3031 else {
3032 unsigned ConvOpcode;
3033 switch (MI->getOpcode()) {
3034 case ARM::t2LDRHi12:
3035 ConvOpcode = ARM::t2LDRHi8;
3036 break;
3037 case ARM::t2LDRSHi12:
3038 ConvOpcode = ARM::t2LDRSHi8;
3039 break;
3040 case ARM::t2LDRBi12:
3041 ConvOpcode = ARM::t2LDRBi8;
3042 break;
3043 case ARM::t2LDRSBi12:
3044 ConvOpcode = ARM::t2LDRSBi8;
3045 break;
3046 case ARM::t2STRHi12:
3047 ConvOpcode = ARM::t2STRHi8;
3048 break;
3049 case ARM::t2STRBi12:
3050 ConvOpcode = ARM::t2STRBi8;
3051 break;
3052 default:
3053 llvm_unreachable("Unhandled convertable opcode");
3054 }
3055 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3056 "Illegal Address Immediate after convert!");
3057
3058 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3059 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3060 .add(MI->getOperand(0))
3061 .add(MI->getOperand(1))
3062 .addImm(OldOffset - Offset)
3063 .add(MI->getOperand(3))
3064 .add(MI->getOperand(4))
3065 .cloneMemRefs(*MI);
3066 MI->eraseFromParent();
3067 }
3068}
3069
3071 Register NewReg,
3072 const TargetInstrInfo *TII,
3073 const TargetRegisterInfo *TRI) {
3074 MachineFunction *MF = MI->getMF();
3076
3077 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3078 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3079
3080 const MCInstrDesc &MCID = TII->get(NewOpcode);
3081 // Constrain the def register class
3082 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
3083 MRI.constrainRegClass(NewReg, TRC);
3084 // And do the same for the base operand
3085 TRC = TII->getRegClass(MCID, 2, TRI, *MF);
3086 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3087
3088 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3089 switch (AddrMode) {
3093 // Any MVE load/store
3094 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3095 .addReg(NewReg, RegState::Define)
3096 .add(MI->getOperand(0))
3097 .add(MI->getOperand(1))
3098 .addImm(Offset)
3099 .add(MI->getOperand(3))
3100 .add(MI->getOperand(4))
3101 .add(MI->getOperand(5))
3102 .cloneMemRefs(*MI);
3104 if (MI->mayLoad()) {
3105 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3106 .add(MI->getOperand(0))
3107 .addReg(NewReg, RegState::Define)
3108 .add(MI->getOperand(1))
3109 .addImm(Offset)
3110 .add(MI->getOperand(3))
3111 .add(MI->getOperand(4))
3112 .cloneMemRefs(*MI);
3113 } else {
3114 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3115 .addReg(NewReg, RegState::Define)
3116 .add(MI->getOperand(0))
3117 .add(MI->getOperand(1))
3118 .addImm(Offset)
3119 .add(MI->getOperand(3))
3120 .add(MI->getOperand(4))
3121 .cloneMemRefs(*MI);
3122 }
3123 default:
3124 llvm_unreachable("Unhandled createPostIncLoadStore");
3125 }
3126}
3127
3128// Given a Base Register, optimise the load/store uses to attempt to create more
3129// post-inc accesses and less register moves. We do this by taking zero offset
3130// loads/stores with an add, and convert them to a postinc load/store of the
3131// same type. Any subsequent accesses will be adjusted to use and account for
3132// the post-inc value.
3133// For example:
3134// LDR #0 LDR_POSTINC #16
3135// LDR #4 LDR #-12
3136// LDR #8 LDR #-8
3137// LDR #12 LDR #-4
3138// ADD #16
3139//
3140// At the same time if we do not find an increment but do find an existing
3141// pre/post inc instruction, we can still adjust the offsets of subsequent
3142// instructions to save the register move that would otherwise be needed for the
3143// in-place increment.
3144bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3145 // We are looking for:
3146 // One zero offset load/store that can become postinc
3147 MachineInstr *BaseAccess = nullptr;
3148 MachineInstr *PrePostInc = nullptr;
3149 // An increment that can be folded in
3150 MachineInstr *Increment = nullptr;
3151 // Other accesses after BaseAccess that will need to be updated to use the
3152 // postinc value.
3153 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3154 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3155 if (!Increment && getAddSubImmediate(Use) != 0) {
3156 Increment = &Use;
3157 continue;
3158 }
3159
3160 int BaseOp = getBaseOperandIndex(Use);
3161 if (BaseOp == -1)
3162 return false;
3163
3164 if (!Use.getOperand(BaseOp).isReg() ||
3165 Use.getOperand(BaseOp).getReg() != Base)
3166 return false;
3167 if (isPreIndex(Use) || isPostIndex(Use))
3168 PrePostInc = &Use;
3169 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3170 BaseAccess = &Use;
3171 else
3172 OtherAccesses.insert(&Use);
3173 }
3174
3175 int IncrementOffset;
3176 Register NewBaseReg;
3177 if (BaseAccess && Increment) {
3178 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3179 return false;
3180 Register PredReg;
3181 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3182 getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
3183 return false;
3184
3185 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3186 << Base.virtRegIndex() << "\n");
3187
3188 // Make sure that Increment has no uses before BaseAccess that are not PHI
3189 // uses.
3190 for (MachineInstr &Use :
3191 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3192 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3193 !DT->dominates(BaseAccess, &Use))) {
3194 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3195 return false;
3196 }
3197 }
3198
3199 // Make sure that Increment can be folded into Base
3200 IncrementOffset = getAddSubImmediate(*Increment);
3201 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3202 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3203 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3204 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3205 return false;
3206 }
3207 }
3208 else if (PrePostInc) {
3209 // If we already have a pre/post index load/store then set BaseAccess,
3210 // IncrementOffset and NewBaseReg to the values it already produces,
3211 // allowing us to update and subsequent uses of BaseOp reg with the
3212 // incremented value.
3213 if (Increment)
3214 return false;
3215
3216 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3217 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3218 int BaseOp = getBaseOperandIndex(*PrePostInc);
3219 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3220 BaseAccess = PrePostInc;
3221 NewBaseReg = PrePostInc->getOperand(0).getReg();
3222 }
3223 else
3224 return false;
3225
3226 // And make sure that the negative value of increment can be added to all
3227 // other offsets after the BaseAccess. We rely on either
3228 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3229 // to keep things simple.
3230 // This also adds a simple codesize metric, to detect if an instruction (like
3231 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3232 // cannot because it is converted to something else (t2LDRBi8). We start this
3233 // at -1 for the gain from removing the increment.
3234 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3235 int CodesizeEstimate = -1;
3236 for (auto *Use : OtherAccesses) {
3237 if (DT->dominates(BaseAccess, Use)) {
3238 SuccessorAccesses.insert(Use);
3239 unsigned BaseOp = getBaseOperandIndex(*Use);
3240 if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
3241 Use->getOperand(BaseOp + 1).getImm() -
3242 IncrementOffset,
3243 TII, CodesizeEstimate)) {
3244 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3245 return false;
3246 }
3247 } else if (!DT->dominates(Use, BaseAccess)) {
3248 LLVM_DEBUG(
3249 dbgs() << " Unknown dominance relation between Base and Use\n");
3250 return false;
3251 }
3252 }
3253 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3254 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3255 return false;
3256 }
3257
3258 if (!PrePostInc) {
3259 // Replace BaseAccess with a post inc
3260 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3261 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3262 NewBaseReg = Increment->getOperand(0).getReg();
3263 MachineInstr *BaseAccessPost =
3264 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3265 BaseAccess->eraseFromParent();
3266 Increment->eraseFromParent();
3267 (void)BaseAccessPost;
3268 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3269 }
3270
3271 for (auto *Use : SuccessorAccesses) {
3272 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3273 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3274 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3275 }
3276
3277 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3278 // remain.
3279 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3280 Op.setIsKill(false);
3281 return true;
3282}
3283
3284bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3285 bool Changed = false;
3287 for (auto &MBB : *MF) {
3288 for (auto &MI : MBB) {
3289 int BaseOp = getBaseOperandIndex(MI);
3290 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3291 continue;
3292
3293 Register Base = MI.getOperand(BaseOp).getReg();
3294 if (!Base.isVirtual() || Visited.count(Base))
3295 continue;
3296
3297 Visited.insert(Base);
3298 }
3299 }
3300
3301 for (auto Base : Visited)
3302 Changed |= DistributeIncrements(Base);
3303
3304 return Changed;
3305}
3306
3307/// Returns an instance of the load / store optimization pass.
3309 if (PreAlloc)
3310 return new ARMPreAllocLoadStoreOpt();
3311 return new ARMLoadStoreOpt();
3312}
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool > > &Regs, unsigned Reg)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static int getMemoryOpOffset(const MachineInstr &MI)
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
arm prera ldst opt
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:531
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:815
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:389
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5266
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Define
Register definition.
@ Kill
The last use of a register.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1446
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
unsigned getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
Definition: VE.h:376
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
int getAddSubImmediate(MachineInstr &MI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.