LLVM 20.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99 /// Post- register allocation pass the combine load / store instructions to
100 /// form ldm / stm instructions.
101 struct ARMLoadStoreOpt : public MachineFunctionPass {
102 static char ID;
103
104 const MachineFunction *MF;
105 const TargetInstrInfo *TII;
106 const TargetRegisterInfo *TRI;
107 const ARMSubtarget *STI;
108 const TargetLowering *TL;
109 ARMFunctionInfo *AFI;
110 LiveRegUnits LiveRegs;
111 RegisterClassInfo RegClassInfo;
113 bool LiveRegsValid;
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
116
117 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
118
119 bool runOnMachineFunction(MachineFunction &Fn) override;
120
123 MachineFunctionProperties::Property::NoVRegs);
124 }
125
126 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
127
128 private:
129 /// A set of load/store MachineInstrs with same base register sorted by
130 /// offset.
131 struct MemOpQueueEntry {
133 int Offset; ///< Load/Store offset.
134 unsigned Position; ///< Position as counted from end of basic block.
135
136 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
137 : MI(&MI), Offset(Offset), Position(Position) {}
138 };
139 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
140
141 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
142 /// merged into a LDM/STM.
143 struct MergeCandidate {
144 /// List of instructions ordered by load/store offset.
146
147 /// Index in Instrs of the instruction being latest in the schedule.
148 unsigned LatestMIIdx;
149
150 /// Index in Instrs of the instruction being earliest in the schedule.
151 unsigned EarliestMIIdx;
152
153 /// Index into the basic block where the merged instruction will be
154 /// inserted. (See MemOpQueueEntry.Position)
155 unsigned InsertPos;
156
157 /// Whether the instructions can be merged into a ldm/stm instruction.
158 bool CanMergeToLSMulti;
159
160 /// Whether the instructions can be merged into a ldrd/strd instruction.
161 bool CanMergeToLSDouble;
162 };
165 SmallVector<MachineInstr*,4> MergeBaseCandidates;
166
167 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
169 unsigned findFreeReg(const TargetRegisterClass &RegClass);
170 void UpdateBaseRegUses(MachineBasicBlock &MBB,
172 unsigned Base, unsigned WordOffset,
173 ARMCC::CondCodes Pred, unsigned PredReg);
174 MachineInstr *CreateLoadStoreMulti(
176 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
177 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
178 ArrayRef<std::pair<unsigned, bool>> Regs,
180 MachineInstr *CreateLoadStoreDouble(
182 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
183 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
184 ArrayRef<std::pair<unsigned, bool>> Regs,
185 ArrayRef<MachineInstr*> Instrs) const;
186 void FormCandidates(const MemOpQueue &MemOps);
187 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
188 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
190 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
191 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
192 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
193 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
194 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
195 bool CombineMovBx(MachineBasicBlock &MBB);
196 };
197
198} // end anonymous namespace
199
200char ARMLoadStoreOpt::ID = 0;
201
202INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
203 false)
204
205static bool definesCPSR(const MachineInstr &MI) {
206 for (const auto &MO : MI.operands()) {
207 if (!MO.isReg())
208 continue;
209 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
210 // If the instruction has live CPSR def, then it's not safe to fold it
211 // into load / store.
212 return true;
213 }
214
215 return false;
216}
217
219 unsigned Opcode = MI.getOpcode();
220 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
221 unsigned NumOperands = MI.getDesc().getNumOperands();
222 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
223
224 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
225 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
226 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
227 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
228 return OffField;
229
230 // Thumb1 immediate offsets are scaled by 4
231 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
232 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
233 return OffField * 4;
234
235 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
236 : ARM_AM::getAM5Offset(OffField) * 4;
237 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
238 : ARM_AM::getAM5Op(OffField);
239
240 if (Op == ARM_AM::sub)
241 return -Offset;
242
243 return Offset;
244}
245
247 return MI.getOperand(1);
248}
249
251 return MI.getOperand(0);
252}
253
254static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
255 switch (Opcode) {
256 default: llvm_unreachable("Unhandled opcode!");
257 case ARM::LDRi12:
258 ++NumLDMGened;
259 switch (Mode) {
260 default: llvm_unreachable("Unhandled submode!");
261 case ARM_AM::ia: return ARM::LDMIA;
262 case ARM_AM::da: return ARM::LDMDA;
263 case ARM_AM::db: return ARM::LDMDB;
264 case ARM_AM::ib: return ARM::LDMIB;
265 }
266 case ARM::STRi12:
267 ++NumSTMGened;
268 switch (Mode) {
269 default: llvm_unreachable("Unhandled submode!");
270 case ARM_AM::ia: return ARM::STMIA;
271 case ARM_AM::da: return ARM::STMDA;
272 case ARM_AM::db: return ARM::STMDB;
273 case ARM_AM::ib: return ARM::STMIB;
274 }
275 case ARM::tLDRi:
276 case ARM::tLDRspi:
277 // tLDMIA is writeback-only - unless the base register is in the input
278 // reglist.
279 ++NumLDMGened;
280 switch (Mode) {
281 default: llvm_unreachable("Unhandled submode!");
282 case ARM_AM::ia: return ARM::tLDMIA;
283 }
284 case ARM::tSTRi:
285 case ARM::tSTRspi:
286 // There is no non-writeback tSTMIA either.
287 ++NumSTMGened;
288 switch (Mode) {
289 default: llvm_unreachable("Unhandled submode!");
290 case ARM_AM::ia: return ARM::tSTMIA_UPD;
291 }
292 case ARM::t2LDRi8:
293 case ARM::t2LDRi12:
294 ++NumLDMGened;
295 switch (Mode) {
296 default: llvm_unreachable("Unhandled submode!");
297 case ARM_AM::ia: return ARM::t2LDMIA;
298 case ARM_AM::db: return ARM::t2LDMDB;
299 }
300 case ARM::t2STRi8:
301 case ARM::t2STRi12:
302 ++NumSTMGened;
303 switch (Mode) {
304 default: llvm_unreachable("Unhandled submode!");
305 case ARM_AM::ia: return ARM::t2STMIA;
306 case ARM_AM::db: return ARM::t2STMDB;
307 }
308 case ARM::VLDRS:
309 ++NumVLDMGened;
310 switch (Mode) {
311 default: llvm_unreachable("Unhandled submode!");
312 case ARM_AM::ia: return ARM::VLDMSIA;
313 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
314 }
315 case ARM::VSTRS:
316 ++NumVSTMGened;
317 switch (Mode) {
318 default: llvm_unreachable("Unhandled submode!");
319 case ARM_AM::ia: return ARM::VSTMSIA;
320 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
321 }
322 case ARM::VLDRD:
323 ++NumVLDMGened;
324 switch (Mode) {
325 default: llvm_unreachable("Unhandled submode!");
326 case ARM_AM::ia: return ARM::VLDMDIA;
327 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
328 }
329 case ARM::VSTRD:
330 ++NumVSTMGened;
331 switch (Mode) {
332 default: llvm_unreachable("Unhandled submode!");
333 case ARM_AM::ia: return ARM::VSTMDIA;
334 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
335 }
336 }
337}
338
340 switch (Opcode) {
341 default: llvm_unreachable("Unhandled opcode!");
342 case ARM::LDMIA_RET:
343 case ARM::LDMIA:
344 case ARM::LDMIA_UPD:
345 case ARM::STMIA:
346 case ARM::STMIA_UPD:
347 case ARM::tLDMIA:
348 case ARM::tLDMIA_UPD:
349 case ARM::tSTMIA_UPD:
350 case ARM::t2LDMIA_RET:
351 case ARM::t2LDMIA:
352 case ARM::t2LDMIA_UPD:
353 case ARM::t2STMIA:
354 case ARM::t2STMIA_UPD:
355 case ARM::VLDMSIA:
356 case ARM::VLDMSIA_UPD:
357 case ARM::VSTMSIA:
358 case ARM::VSTMSIA_UPD:
359 case ARM::VLDMDIA:
360 case ARM::VLDMDIA_UPD:
361 case ARM::VSTMDIA:
362 case ARM::VSTMDIA_UPD:
363 return ARM_AM::ia;
364
365 case ARM::LDMDA:
366 case ARM::LDMDA_UPD:
367 case ARM::STMDA:
368 case ARM::STMDA_UPD:
369 return ARM_AM::da;
370
371 case ARM::LDMDB:
372 case ARM::LDMDB_UPD:
373 case ARM::STMDB:
374 case ARM::STMDB_UPD:
375 case ARM::t2LDMDB:
376 case ARM::t2LDMDB_UPD:
377 case ARM::t2STMDB:
378 case ARM::t2STMDB_UPD:
379 case ARM::VLDMSDB_UPD:
380 case ARM::VSTMSDB_UPD:
381 case ARM::VLDMDDB_UPD:
382 case ARM::VSTMDDB_UPD:
383 return ARM_AM::db;
384
385 case ARM::LDMIB:
386 case ARM::LDMIB_UPD:
387 case ARM::STMIB:
388 case ARM::STMIB_UPD:
389 return ARM_AM::ib;
390 }
391}
392
393static bool isT1i32Load(unsigned Opc) {
394 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
395}
396
397static bool isT2i32Load(unsigned Opc) {
398 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
399}
400
401static bool isi32Load(unsigned Opc) {
402 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
403}
404
405static bool isT1i32Store(unsigned Opc) {
406 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
407}
408
409static bool isT2i32Store(unsigned Opc) {
410 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
411}
412
413static bool isi32Store(unsigned Opc) {
414 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
415}
416
417static bool isLoadSingle(unsigned Opc) {
418 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
419}
420
421static unsigned getImmScale(unsigned Opc) {
422 switch (Opc) {
423 default: llvm_unreachable("Unhandled opcode!");
424 case ARM::tLDRi:
425 case ARM::tSTRi:
426 case ARM::tLDRspi:
427 case ARM::tSTRspi:
428 return 1;
429 case ARM::tLDRHi:
430 case ARM::tSTRHi:
431 return 2;
432 case ARM::tLDRBi:
433 case ARM::tSTRBi:
434 return 4;
435 }
436}
437
439 switch (MI->getOpcode()) {
440 default: return 0;
441 case ARM::LDRi12:
442 case ARM::STRi12:
443 case ARM::tLDRi:
444 case ARM::tSTRi:
445 case ARM::tLDRspi:
446 case ARM::tSTRspi:
447 case ARM::t2LDRi8:
448 case ARM::t2LDRi12:
449 case ARM::t2STRi8:
450 case ARM::t2STRi12:
451 case ARM::VLDRS:
452 case ARM::VSTRS:
453 return 4;
454 case ARM::VLDRD:
455 case ARM::VSTRD:
456 return 8;
457 case ARM::LDMIA:
458 case ARM::LDMDA:
459 case ARM::LDMDB:
460 case ARM::LDMIB:
461 case ARM::STMIA:
462 case ARM::STMDA:
463 case ARM::STMDB:
464 case ARM::STMIB:
465 case ARM::tLDMIA:
466 case ARM::tLDMIA_UPD:
467 case ARM::tSTMIA_UPD:
468 case ARM::t2LDMIA:
469 case ARM::t2LDMDB:
470 case ARM::t2STMIA:
471 case ARM::t2STMDB:
472 case ARM::VLDMSIA:
473 case ARM::VSTMSIA:
474 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
475 case ARM::VLDMDIA:
476 case ARM::VSTMDIA:
477 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
478 }
479}
480
481/// Update future uses of the base register with the offset introduced
482/// due to writeback. This function only works on Thumb1.
483void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
485 const DebugLoc &DL, unsigned Base,
486 unsigned WordOffset,
487 ARMCC::CondCodes Pred,
488 unsigned PredReg) {
489 assert(isThumb1 && "Can only update base register uses for Thumb1!");
490 // Start updating any instructions with immediate offsets. Insert a SUB before
491 // the first non-updateable instruction (if any).
492 for (; MBBI != MBB.end(); ++MBBI) {
493 bool InsertSub = false;
494 unsigned Opc = MBBI->getOpcode();
495
496 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
497 int Offset;
498 bool IsLoad =
499 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
500 bool IsStore =
501 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
502
503 if (IsLoad || IsStore) {
504 // Loads and stores with immediate offsets can be updated, but only if
505 // the new offset isn't negative.
506 // The MachineOperand containing the offset immediate is the last one
507 // before predicates.
508 MachineOperand &MO =
509 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
510 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
511 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
512
513 // If storing the base register, it needs to be reset first.
514 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
515
516 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
517 MO.setImm(Offset);
518 else
519 InsertSub = true;
520 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
521 !definesCPSR(*MBBI)) {
522 // SUBS/ADDS using this register, with a dead def of the CPSR.
523 // Merge it with the update; if the merged offset is too large,
524 // insert a new sub instead.
525 MachineOperand &MO =
526 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
527 Offset = (Opc == ARM::tSUBi8) ?
528 MO.getImm() + WordOffset * 4 :
529 MO.getImm() - WordOffset * 4 ;
530 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
531 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
532 // Offset == 0.
533 MO.setImm(Offset);
534 // The base register has now been reset, so exit early.
535 return;
536 } else {
537 InsertSub = true;
538 }
539 } else {
540 // Can't update the instruction.
541 InsertSub = true;
542 }
543 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
544 // Since SUBS sets the condition flags, we can't place the base reset
545 // after an instruction that has a live CPSR def.
546 // The base register might also contain an argument for a function call.
547 InsertSub = true;
548 }
549
550 if (InsertSub) {
551 // An instruction above couldn't be updated, so insert a sub.
552 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
553 .add(t1CondCodeOp(true))
554 .addReg(Base)
555 .addImm(WordOffset * 4)
556 .addImm(Pred)
557 .addReg(PredReg);
558 return;
559 }
560
561 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
562 MBBI->definesRegister(Base, /*TRI=*/nullptr))
563 // Register got killed. Stop updating.
564 return;
565 }
566
567 // End of block was reached.
568 if (!MBB.succ_empty()) {
569 // FIXME: Because of a bug, live registers are sometimes missing from
570 // the successor blocks' live-in sets. This means we can't trust that
571 // information and *always* have to reset at the end of a block.
572 // See PR21029.
573 if (MBBI != MBB.end()) --MBBI;
574 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
575 .add(t1CondCodeOp(true))
576 .addReg(Base)
577 .addImm(WordOffset * 4)
578 .addImm(Pred)
579 .addReg(PredReg);
580 }
581}
582
583/// Return the first register of class \p RegClass that is not in \p Regs.
584unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
585 if (!RegClassInfoValid) {
586 RegClassInfo.runOnMachineFunction(*MF);
587 RegClassInfoValid = true;
588 }
589
590 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
591 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
592 return Reg;
593 return 0;
594}
595
596/// Compute live registers just before instruction \p Before (in normal schedule
597/// direction). Computes backwards so multiple queries in the same block must
598/// come in reverse order.
599void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
601 // Initialize if we never queried in this block.
602 if (!LiveRegsValid) {
603 LiveRegs.init(*TRI);
604 LiveRegs.addLiveOuts(MBB);
605 LiveRegPos = MBB.end();
606 LiveRegsValid = true;
607 }
608 // Move backward just before the "Before" position.
609 while (LiveRegPos != Before) {
610 --LiveRegPos;
611 LiveRegs.stepBackward(*LiveRegPos);
612 }
613}
614
615static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
616 unsigned Reg) {
617 for (const std::pair<unsigned, bool> &R : Regs)
618 if (R.first == Reg)
619 return true;
620 return false;
621}
622
623/// Create and insert a LDM or STM with Base as base register and registers in
624/// Regs as the register operands that would be loaded / stored. It returns
625/// true if the transformation is done.
626MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
628 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
629 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
630 ArrayRef<std::pair<unsigned, bool>> Regs,
632 unsigned NumRegs = Regs.size();
633 assert(NumRegs > 1);
634
635 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
636 // Compute liveness information for that register to make the decision.
637 bool SafeToClobberCPSR = !isThumb1 ||
638 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
640
641 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
642
643 // Exception: If the base register is in the input reglist, Thumb1 LDM is
644 // non-writeback.
645 // It's also not possible to merge an STR of the base register in Thumb1.
646 if (isThumb1 && ContainsReg(Regs, Base)) {
647 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
648 if (Opcode == ARM::tLDRi)
649 Writeback = false;
650 else if (Opcode == ARM::tSTRi)
651 return nullptr;
652 }
653
655 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
656 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
657 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
658
659 if (Offset == 4 && haveIBAndDA) {
661 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
663 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
664 // VLDM/VSTM do not support DB mode without also updating the base reg.
666 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
667 // Check if this is a supported opcode before inserting instructions to
668 // calculate a new base register.
669 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
670
671 // If starting offset isn't zero, insert a MI to materialize a new base.
672 // But only do so if it is cost effective, i.e. merging more than two
673 // loads / stores.
674 if (NumRegs <= 2)
675 return nullptr;
676
677 // On Thumb1, it's not worth materializing a new base register without
678 // clobbering the CPSR (i.e. not using ADDS/SUBS).
679 if (!SafeToClobberCPSR)
680 return nullptr;
681
682 unsigned NewBase;
683 if (isi32Load(Opcode)) {
684 // If it is a load, then just use one of the destination registers
685 // as the new base. Will no longer be writeback in Thumb1.
686 NewBase = Regs[NumRegs-1].first;
687 Writeback = false;
688 } else {
689 // Find a free register that we can use as scratch register.
690 moveLiveRegsBefore(MBB, InsertBefore);
691 // The merged instruction does not exist yet but will use several Regs if
692 // it is a Store.
693 if (!isLoadSingle(Opcode))
694 for (const std::pair<unsigned, bool> &R : Regs)
695 LiveRegs.addReg(R.first);
696
697 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
698 if (NewBase == 0)
699 return nullptr;
700 }
701
702 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
703 : ARM::t2ADDri)
704 : (isThumb1 && Base == ARM::SP)
705 ? ARM::tADDrSPi
706 : (isThumb1 && Offset < 8)
707 ? ARM::tADDi3
708 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
709
710 if (Offset < 0) {
711 // FIXME: There are no Thumb1 load/store instructions with negative
712 // offsets. So the Base != ARM::SP might be unnecessary.
713 Offset = -Offset;
714 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
715 : ARM::t2SUBri)
716 : (isThumb1 && Offset < 8 && Base != ARM::SP)
717 ? ARM::tSUBi3
718 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
719 }
720
721 if (!TL->isLegalAddImmediate(Offset))
722 // FIXME: Try add with register operand?
723 return nullptr; // Probably not worth it then.
724
725 // We can only append a kill flag to the add/sub input if the value is not
726 // used in the register list of the stm as well.
727 bool KillOldBase = BaseKill &&
728 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
729
730 if (isThumb1) {
731 // Thumb1: depending on immediate size, use either
732 // ADDS NewBase, Base, #imm3
733 // or
734 // MOV NewBase, Base
735 // ADDS NewBase, #imm8.
736 if (Base != NewBase &&
737 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
738 // Need to insert a MOV to the new base first.
739 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
740 !STI->hasV6Ops()) {
741 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
742 if (Pred != ARMCC::AL)
743 return nullptr;
744 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
745 .addReg(Base, getKillRegState(KillOldBase));
746 } else
747 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
748 .addReg(Base, getKillRegState(KillOldBase))
749 .add(predOps(Pred, PredReg));
750
751 // The following ADDS/SUBS becomes an update.
752 Base = NewBase;
753 KillOldBase = true;
754 }
755 if (BaseOpc == ARM::tADDrSPi) {
756 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
757 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
758 .addReg(Base, getKillRegState(KillOldBase))
759 .addImm(Offset / 4)
760 .add(predOps(Pred, PredReg));
761 } else
762 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
763 .add(t1CondCodeOp(true))
764 .addReg(Base, getKillRegState(KillOldBase))
765 .addImm(Offset)
766 .add(predOps(Pred, PredReg));
767 } else {
768 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
769 .addReg(Base, getKillRegState(KillOldBase))
770 .addImm(Offset)
771 .add(predOps(Pred, PredReg))
772 .add(condCodeOp());
773 }
774 Base = NewBase;
775 BaseKill = true; // New base is always killed straight away.
776 }
777
778 bool isDef = isLoadSingle(Opcode);
779
780 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
781 // base register writeback.
782 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
783 if (!Opcode)
784 return nullptr;
785
786 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
787 // - There is no writeback (LDM of base register),
788 // - the base register is killed by the merged instruction,
789 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
790 // to reset the base register.
791 // Otherwise, don't merge.
792 // It's safe to return here since the code to materialize a new base register
793 // above is also conditional on SafeToClobberCPSR.
794 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
795 return nullptr;
796
798
799 if (Writeback) {
800 assert(isThumb1 && "expected Writeback only inThumb1");
801 if (Opcode == ARM::tLDMIA) {
802 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
803 // Update tLDMIA with writeback if necessary.
804 Opcode = ARM::tLDMIA_UPD;
805 }
806
807 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
808
809 // Thumb1: we might need to set base writeback when building the MI.
810 MIB.addReg(Base, getDefRegState(true))
811 .addReg(Base, getKillRegState(BaseKill));
812
813 // The base isn't dead after a merged instruction with writeback.
814 // Insert a sub instruction after the newly formed instruction to reset.
815 if (!BaseKill)
816 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
817 } else {
818 // No writeback, simply build the MachineInstr.
819 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
820 MIB.addReg(Base, getKillRegState(BaseKill));
821 }
822
823 MIB.addImm(Pred).addReg(PredReg);
824
825 for (const std::pair<unsigned, bool> &R : Regs)
826 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
827
828 MIB.cloneMergedMemRefs(Instrs);
829
830 return MIB.getInstr();
831}
832
833MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
835 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
836 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
837 ArrayRef<std::pair<unsigned, bool>> Regs,
838 ArrayRef<MachineInstr*> Instrs) const {
839 bool IsLoad = isi32Load(Opcode);
840 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
841 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
842
843 assert(Regs.size() == 2);
844 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
845 TII->get(LoadStoreOpcode));
846 if (IsLoad) {
847 MIB.addReg(Regs[0].first, RegState::Define)
848 .addReg(Regs[1].first, RegState::Define);
849 } else {
850 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
851 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
852 }
853 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
854 MIB.cloneMergedMemRefs(Instrs);
855 return MIB.getInstr();
856}
857
858/// Call MergeOps and update MemOps and merges accordingly on success.
859MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
860 const MachineInstr *First = Cand.Instrs.front();
861 unsigned Opcode = First->getOpcode();
862 bool IsLoad = isLoadSingle(Opcode);
865 DenseSet<unsigned> KilledRegs;
866 DenseSet<unsigned> UsedRegs;
867 // Determine list of registers and list of implicit super-register defs.
868 for (const MachineInstr *MI : Cand.Instrs) {
869 const MachineOperand &MO = getLoadStoreRegOp(*MI);
870 Register Reg = MO.getReg();
871 bool IsKill = MO.isKill();
872 if (IsKill)
873 KilledRegs.insert(Reg);
874 Regs.push_back(std::make_pair(Reg, IsKill));
875 UsedRegs.insert(Reg);
876
877 if (IsLoad) {
878 // Collect any implicit defs of super-registers, after merging we can't
879 // be sure anymore that we properly preserved these live ranges and must
880 // removed these implicit operands.
881 for (const MachineOperand &MO : MI->implicit_operands()) {
882 if (!MO.isReg() || !MO.isDef() || MO.isDead())
883 continue;
884 assert(MO.isImplicit());
885 Register DefReg = MO.getReg();
886
887 if (is_contained(ImpDefs, DefReg))
888 continue;
889 // We can ignore cases where the super-reg is read and written.
890 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
891 continue;
892 ImpDefs.push_back(DefReg);
893 }
894 }
895 }
896
897 // Attempt the merge.
898 using iterator = MachineBasicBlock::iterator;
899
900 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
901 iterator InsertBefore = std::next(iterator(LatestMI));
902 MachineBasicBlock &MBB = *LatestMI->getParent();
903 unsigned Offset = getMemoryOpOffset(*First);
905 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
906 Register PredReg;
907 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
908 DebugLoc DL = First->getDebugLoc();
909 MachineInstr *Merged = nullptr;
910 if (Cand.CanMergeToLSDouble)
911 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
912 Opcode, Pred, PredReg, DL, Regs,
913 Cand.Instrs);
914 if (!Merged && Cand.CanMergeToLSMulti)
915 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
916 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
917 if (!Merged)
918 return nullptr;
919
920 // Determine earliest instruction that will get removed. We then keep an
921 // iterator just above it so the following erases don't invalidated it.
922 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
923 bool EarliestAtBegin = false;
924 if (EarliestI == MBB.begin()) {
925 EarliestAtBegin = true;
926 } else {
927 EarliestI = std::prev(EarliestI);
928 }
929
930 // Remove instructions which have been merged.
931 for (MachineInstr *MI : Cand.Instrs)
932 MBB.erase(MI);
933
934 // Determine range between the earliest removed instruction and the new one.
935 if (EarliestAtBegin)
936 EarliestI = MBB.begin();
937 else
938 EarliestI = std::next(EarliestI);
939 auto FixupRange = make_range(EarliestI, iterator(Merged));
940
941 if (isLoadSingle(Opcode)) {
942 // If the previous loads defined a super-reg, then we have to mark earlier
943 // operands undef; Replicate the super-reg def on the merged instruction.
944 for (MachineInstr &MI : FixupRange) {
945 for (unsigned &ImpDefReg : ImpDefs) {
946 for (MachineOperand &MO : MI.implicit_operands()) {
947 if (!MO.isReg() || MO.getReg() != ImpDefReg)
948 continue;
949 if (MO.readsReg())
950 MO.setIsUndef();
951 else if (MO.isDef())
952 ImpDefReg = 0;
953 }
954 }
955 }
956
957 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
958 for (unsigned ImpDef : ImpDefs)
959 MIB.addReg(ImpDef, RegState::ImplicitDefine);
960 } else {
961 // Remove kill flags: We are possibly storing the values later now.
962 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
963 for (MachineInstr &MI : FixupRange) {
964 for (MachineOperand &MO : MI.uses()) {
965 if (!MO.isReg() || !MO.isKill())
966 continue;
967 if (UsedRegs.count(MO.getReg()))
968 MO.setIsKill(false);
969 }
970 }
971 assert(ImpDefs.empty());
972 }
973
974 return Merged;
975}
976
978 unsigned Value = abs(Offset);
979 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
980 // multiplied by 4.
981 return (Value % 4) == 0 && Value < 1024;
982}
983
984/// Return true for loads/stores that can be combined to a double/multi
985/// operation without increasing the requirements for alignment.
987 const MachineInstr &MI) {
988 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
989 // difference.
990 unsigned Opcode = MI.getOpcode();
991 if (!isi32Load(Opcode) && !isi32Store(Opcode))
992 return true;
993
994 // Stack pointer alignment is out of the programmers control so we can trust
995 // SP-relative loads/stores.
996 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
998 return true;
999 return false;
1000}
1001
1002/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1003void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1004 const MachineInstr *FirstMI = MemOps[0].MI;
1005 unsigned Opcode = FirstMI->getOpcode();
1006 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1007 unsigned Size = getLSMultipleTransferSize(FirstMI);
1008
1009 unsigned SIndex = 0;
1010 unsigned EIndex = MemOps.size();
1011 do {
1012 // Look at the first instruction.
1013 const MachineInstr *MI = MemOps[SIndex].MI;
1014 int Offset = MemOps[SIndex].Offset;
1015 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1016 Register PReg = PMO.getReg();
1017 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1018 : TRI->getEncodingValue(PReg);
1019 unsigned Latest = SIndex;
1020 unsigned Earliest = SIndex;
1021 unsigned Count = 1;
1022 bool CanMergeToLSDouble =
1023 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1024 // ARM errata 602117: LDRD with base in list may result in incorrect base
1025 // register when interrupted or faulted.
1026 if (STI->isCortexM3() && isi32Load(Opcode) &&
1027 PReg == getLoadStoreBaseOp(*MI).getReg())
1028 CanMergeToLSDouble = false;
1029
1030 bool CanMergeToLSMulti = true;
1031 // On swift vldm/vstm starting with an odd register number as that needs
1032 // more uops than single vldrs.
1033 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1034 CanMergeToLSMulti = false;
1035
1036 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1037 // deprecated; LDM to PC is fine but cannot happen here.
1038 if (PReg == ARM::SP || PReg == ARM::PC)
1039 CanMergeToLSMulti = CanMergeToLSDouble = false;
1040
1041 // Should we be conservative?
1043 CanMergeToLSMulti = CanMergeToLSDouble = false;
1044
1045 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1046 unsigned Limit;
1047 switch (Opcode) {
1048 default:
1049 Limit = UINT_MAX;
1050 break;
1051 case ARM::VLDRD:
1052 case ARM::VSTRD:
1053 Limit = 16;
1054 break;
1055 }
1056
1057 // Merge following instructions where possible.
1058 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1059 int NewOffset = MemOps[I].Offset;
1060 if (NewOffset != Offset + (int)Size)
1061 break;
1062 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1063 Register Reg = MO.getReg();
1064 if (Reg == ARM::SP || Reg == ARM::PC)
1065 break;
1066 if (Count == Limit)
1067 break;
1068
1069 // See if the current load/store may be part of a multi load/store.
1070 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1071 : TRI->getEncodingValue(Reg);
1072 bool PartOfLSMulti = CanMergeToLSMulti;
1073 if (PartOfLSMulti) {
1074 // Register numbers must be in ascending order.
1075 if (RegNum <= PRegNum)
1076 PartOfLSMulti = false;
1077 // For VFP / NEON load/store multiples, the registers must be
1078 // consecutive and within the limit on the number of registers per
1079 // instruction.
1080 else if (!isNotVFP && RegNum != PRegNum+1)
1081 PartOfLSMulti = false;
1082 }
1083 // See if the current load/store may be part of a double load/store.
1084 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1085
1086 if (!PartOfLSMulti && !PartOfLSDouble)
1087 break;
1088 CanMergeToLSMulti &= PartOfLSMulti;
1089 CanMergeToLSDouble &= PartOfLSDouble;
1090 // Track MemOp with latest and earliest position (Positions are
1091 // counted in reverse).
1092 unsigned Position = MemOps[I].Position;
1093 if (Position < MemOps[Latest].Position)
1094 Latest = I;
1095 else if (Position > MemOps[Earliest].Position)
1096 Earliest = I;
1097 // Prepare for next MemOp.
1098 Offset += Size;
1099 PRegNum = RegNum;
1100 }
1101
1102 // Form a candidate from the Ops collected so far.
1103 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1104 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1105 Candidate->Instrs.push_back(MemOps[C].MI);
1106 Candidate->LatestMIIdx = Latest - SIndex;
1107 Candidate->EarliestMIIdx = Earliest - SIndex;
1108 Candidate->InsertPos = MemOps[Latest].Position;
1109 if (Count == 1)
1110 CanMergeToLSMulti = CanMergeToLSDouble = false;
1111 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1112 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1113 Candidates.push_back(Candidate);
1114 // Continue after the chain.
1115 SIndex += Count;
1116 } while (SIndex < EIndex);
1117}
1118
1119static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1120 ARM_AM::AMSubMode Mode) {
1121 switch (Opc) {
1122 default: llvm_unreachable("Unhandled opcode!");
1123 case ARM::LDMIA:
1124 case ARM::LDMDA:
1125 case ARM::LDMDB:
1126 case ARM::LDMIB:
1127 switch (Mode) {
1128 default: llvm_unreachable("Unhandled submode!");
1129 case ARM_AM::ia: return ARM::LDMIA_UPD;
1130 case ARM_AM::ib: return ARM::LDMIB_UPD;
1131 case ARM_AM::da: return ARM::LDMDA_UPD;
1132 case ARM_AM::db: return ARM::LDMDB_UPD;
1133 }
1134 case ARM::STMIA:
1135 case ARM::STMDA:
1136 case ARM::STMDB:
1137 case ARM::STMIB:
1138 switch (Mode) {
1139 default: llvm_unreachable("Unhandled submode!");
1140 case ARM_AM::ia: return ARM::STMIA_UPD;
1141 case ARM_AM::ib: return ARM::STMIB_UPD;
1142 case ARM_AM::da: return ARM::STMDA_UPD;
1143 case ARM_AM::db: return ARM::STMDB_UPD;
1144 }
1145 case ARM::t2LDMIA:
1146 case ARM::t2LDMDB:
1147 switch (Mode) {
1148 default: llvm_unreachable("Unhandled submode!");
1149 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1150 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1151 }
1152 case ARM::t2STMIA:
1153 case ARM::t2STMDB:
1154 switch (Mode) {
1155 default: llvm_unreachable("Unhandled submode!");
1156 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1157 case ARM_AM::db: return ARM::t2STMDB_UPD;
1158 }
1159 case ARM::VLDMSIA:
1160 switch (Mode) {
1161 default: llvm_unreachable("Unhandled submode!");
1162 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1163 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1164 }
1165 case ARM::VLDMDIA:
1166 switch (Mode) {
1167 default: llvm_unreachable("Unhandled submode!");
1168 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1169 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1170 }
1171 case ARM::VSTMSIA:
1172 switch (Mode) {
1173 default: llvm_unreachable("Unhandled submode!");
1174 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1175 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1176 }
1177 case ARM::VSTMDIA:
1178 switch (Mode) {
1179 default: llvm_unreachable("Unhandled submode!");
1180 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1181 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1182 }
1183 }
1184}
1185
1186/// Check if the given instruction increments or decrements a register and
1187/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1188/// generated by the instruction are possibly read as well.
1190 ARMCC::CondCodes Pred, Register PredReg) {
1191 bool CheckCPSRDef;
1192 int Scale;
1193 switch (MI.getOpcode()) {
1194 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1195 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1196 case ARM::t2SUBri:
1197 case ARM::t2SUBspImm:
1198 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1199 case ARM::t2ADDri:
1200 case ARM::t2ADDspImm:
1201 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1202 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1203 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1204 default: return 0;
1205 }
1206
1207 Register MIPredReg;
1208 if (MI.getOperand(0).getReg() != Reg ||
1209 MI.getOperand(1).getReg() != Reg ||
1210 getInstrPredicate(MI, MIPredReg) != Pred ||
1211 MIPredReg != PredReg)
1212 return 0;
1213
1214 if (CheckCPSRDef && definesCPSR(MI))
1215 return 0;
1216 return MI.getOperand(2).getImm() * Scale;
1217}
1218
1219/// Searches for an increment or decrement of \p Reg before \p MBBI.
1222 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1223 Offset = 0;
1227 if (MBBI == BeginMBBI)
1228 return EndMBBI;
1229
1230 // Skip debug values.
1231 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1232 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1233 --PrevMBBI;
1234
1235 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1236 return Offset == 0 ? EndMBBI : PrevMBBI;
1237}
1238
1239/// Searches for a increment or decrement of \p Reg after \p MBBI.
1242 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1243 const TargetRegisterInfo *TRI) {
1244 Offset = 0;
1247 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1248 while (NextMBBI != EndMBBI) {
1249 // Skip debug values.
1250 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1251 ++NextMBBI;
1252 if (NextMBBI == EndMBBI)
1253 return EndMBBI;
1254
1255 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1256 if (Off) {
1257 Offset = Off;
1258 return NextMBBI;
1259 }
1260
1261 // SP can only be combined if it is the next instruction after the original
1262 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1263 // anything below the new pointer) when its frame elements are still in
1264 // use. Other registers can attempt to look further, until a different use
1265 // or def of the register is found.
1266 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1267 NextMBBI->definesRegister(Reg, TRI))
1268 return EndMBBI;
1269
1270 ++NextMBBI;
1271 }
1272 return EndMBBI;
1273}
1274
1275/// Fold proceeding/trailing inc/dec of base register into the
1276/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1277///
1278/// stmia rn, <ra, rb, rc>
1279/// rn := rn + 4 * 3;
1280/// =>
1281/// stmia rn!, <ra, rb, rc>
1282///
1283/// rn := rn - 4 * 3;
1284/// ldmia rn, <ra, rb, rc>
1285/// =>
1286/// ldmdb rn!, <ra, rb, rc>
1287bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1288 // Thumb1 is already using updating loads/stores.
1289 if (isThumb1) return false;
1290 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1291
1292 const MachineOperand &BaseOP = MI->getOperand(0);
1293 Register Base = BaseOP.getReg();
1294 bool BaseKill = BaseOP.isKill();
1295 Register PredReg;
1296 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1297 unsigned Opcode = MI->getOpcode();
1298 DebugLoc DL = MI->getDebugLoc();
1299
1300 // Can't use an updating ld/st if the base register is also a dest
1301 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1302 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1303 if (MO.getReg() == Base)
1304 return false;
1305
1306 int Bytes = getLSMultipleTransferSize(MI);
1307 MachineBasicBlock &MBB = *MI->getParent();
1309 int Offset;
1311 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1313 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1314 Mode = ARM_AM::db;
1315 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1316 Mode = ARM_AM::da;
1317 } else {
1318 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1319 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1320 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1321
1322 // We couldn't find an inc/dec to merge. But if the base is dead, we
1323 // can still change to a writeback form as that will save us 2 bytes
1324 // of code size. It can create WAW hazards though, so only do it if
1325 // we're minimizing code size.
1326 if (!STI->hasMinSize() || !BaseKill)
1327 return false;
1328
1329 bool HighRegsUsed = false;
1330 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1331 if (MO.getReg() >= ARM::R8) {
1332 HighRegsUsed = true;
1333 break;
1334 }
1335
1336 if (!HighRegsUsed)
1337 MergeInstr = MBB.end();
1338 else
1339 return false;
1340 }
1341 }
1342 if (MergeInstr != MBB.end()) {
1343 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1344 MBB.erase(MergeInstr);
1345 }
1346
1347 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1348 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1349 .addReg(Base, getDefRegState(true)) // WB base register
1350 .addReg(Base, getKillRegState(BaseKill))
1351 .addImm(Pred).addReg(PredReg);
1352
1353 // Transfer the rest of operands.
1354 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1355 MIB.add(MO);
1356
1357 // Transfer memoperands.
1358 MIB.setMemRefs(MI->memoperands());
1359
1360 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1361 MBB.erase(MBBI);
1362 return true;
1363}
1364
1365static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1366 ARM_AM::AddrOpc Mode) {
1367 switch (Opc) {
1368 case ARM::LDRi12:
1369 return ARM::LDR_PRE_IMM;
1370 case ARM::STRi12:
1371 return ARM::STR_PRE_IMM;
1372 case ARM::VLDRS:
1373 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1374 case ARM::VLDRD:
1375 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1376 case ARM::VSTRS:
1377 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1378 case ARM::VSTRD:
1379 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1380 case ARM::t2LDRi8:
1381 case ARM::t2LDRi12:
1382 return ARM::t2LDR_PRE;
1383 case ARM::t2STRi8:
1384 case ARM::t2STRi12:
1385 return ARM::t2STR_PRE;
1386 default: llvm_unreachable("Unhandled opcode!");
1387 }
1388}
1389
1390static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1391 ARM_AM::AddrOpc Mode) {
1392 switch (Opc) {
1393 case ARM::LDRi12:
1394 return ARM::LDR_POST_IMM;
1395 case ARM::STRi12:
1396 return ARM::STR_POST_IMM;
1397 case ARM::VLDRS:
1398 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1399 case ARM::VLDRD:
1400 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1401 case ARM::VSTRS:
1402 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1403 case ARM::VSTRD:
1404 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1405 case ARM::t2LDRi8:
1406 case ARM::t2LDRi12:
1407 return ARM::t2LDR_POST;
1408 case ARM::t2LDRBi8:
1409 case ARM::t2LDRBi12:
1410 return ARM::t2LDRB_POST;
1411 case ARM::t2LDRSBi8:
1412 case ARM::t2LDRSBi12:
1413 return ARM::t2LDRSB_POST;
1414 case ARM::t2LDRHi8:
1415 case ARM::t2LDRHi12:
1416 return ARM::t2LDRH_POST;
1417 case ARM::t2LDRSHi8:
1418 case ARM::t2LDRSHi12:
1419 return ARM::t2LDRSH_POST;
1420 case ARM::t2STRi8:
1421 case ARM::t2STRi12:
1422 return ARM::t2STR_POST;
1423 case ARM::t2STRBi8:
1424 case ARM::t2STRBi12:
1425 return ARM::t2STRB_POST;
1426 case ARM::t2STRHi8:
1427 case ARM::t2STRHi12:
1428 return ARM::t2STRH_POST;
1429
1430 case ARM::MVE_VLDRBS16:
1431 return ARM::MVE_VLDRBS16_post;
1432 case ARM::MVE_VLDRBS32:
1433 return ARM::MVE_VLDRBS32_post;
1434 case ARM::MVE_VLDRBU16:
1435 return ARM::MVE_VLDRBU16_post;
1436 case ARM::MVE_VLDRBU32:
1437 return ARM::MVE_VLDRBU32_post;
1438 case ARM::MVE_VLDRHS32:
1439 return ARM::MVE_VLDRHS32_post;
1440 case ARM::MVE_VLDRHU32:
1441 return ARM::MVE_VLDRHU32_post;
1442 case ARM::MVE_VLDRBU8:
1443 return ARM::MVE_VLDRBU8_post;
1444 case ARM::MVE_VLDRHU16:
1445 return ARM::MVE_VLDRHU16_post;
1446 case ARM::MVE_VLDRWU32:
1447 return ARM::MVE_VLDRWU32_post;
1448 case ARM::MVE_VSTRB16:
1449 return ARM::MVE_VSTRB16_post;
1450 case ARM::MVE_VSTRB32:
1451 return ARM::MVE_VSTRB32_post;
1452 case ARM::MVE_VSTRH32:
1453 return ARM::MVE_VSTRH32_post;
1454 case ARM::MVE_VSTRBU8:
1455 return ARM::MVE_VSTRBU8_post;
1456 case ARM::MVE_VSTRHU16:
1457 return ARM::MVE_VSTRHU16_post;
1458 case ARM::MVE_VSTRWU32:
1459 return ARM::MVE_VSTRWU32_post;
1460
1461 default: llvm_unreachable("Unhandled opcode!");
1462 }
1463}
1464
1465/// Fold proceeding/trailing inc/dec of base register into the
1466/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1467bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1468 // Thumb1 doesn't have updating LDR/STR.
1469 // FIXME: Use LDM/STM with single register instead.
1470 if (isThumb1) return false;
1471 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1472
1474 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1475 unsigned Opcode = MI->getOpcode();
1476 DebugLoc DL = MI->getDebugLoc();
1477 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1478 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1479 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1480 if (isi32Load(Opcode) || isi32Store(Opcode))
1481 if (MI->getOperand(2).getImm() != 0)
1482 return false;
1483 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1484 return false;
1485
1486 // Can't do the merge if the destination register is the same as the would-be
1487 // writeback register.
1488 if (MI->getOperand(0).getReg() == Base)
1489 return false;
1490
1491 Register PredReg;
1492 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1493 int Bytes = getLSMultipleTransferSize(MI);
1494 MachineBasicBlock &MBB = *MI->getParent();
1496 int Offset;
1498 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1499 unsigned NewOpc;
1500 if (!isAM5 && Offset == Bytes) {
1501 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1502 } else if (Offset == -Bytes) {
1503 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1504 } else {
1505 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1506 if (MergeInstr == MBB.end())
1507 return false;
1508
1510 if ((isAM5 && Offset != Bytes) ||
1511 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1513 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1514 return false;
1515 }
1516 }
1517 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1518 MBB.erase(MergeInstr);
1519
1521
1522 bool isLd = isLoadSingle(Opcode);
1523 if (isAM5) {
1524 // VLDM[SD]_UPD, VSTM[SD]_UPD
1525 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1526 // updating load/store-multiple instructions can be used with only one
1527 // register.)
1528 MachineOperand &MO = MI->getOperand(0);
1529 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1530 .addReg(Base, getDefRegState(true)) // WB base register
1531 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1532 .addImm(Pred)
1533 .addReg(PredReg)
1534 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1535 : getKillRegState(MO.isKill())))
1536 .cloneMemRefs(*MI);
1537 (void)MIB;
1538 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1539 } else if (isLd) {
1540 if (isAM2) {
1541 // LDR_PRE, LDR_POST
1542 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1543 auto MIB =
1544 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1546 .addReg(Base)
1547 .addImm(Offset)
1548 .addImm(Pred)
1549 .addReg(PredReg)
1550 .cloneMemRefs(*MI);
1551 (void)MIB;
1552 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1553 } else {
1555 auto MIB =
1556 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1558 .addReg(Base)
1559 .addReg(0)
1560 .addImm(Imm)
1561 .add(predOps(Pred, PredReg))
1562 .cloneMemRefs(*MI);
1563 (void)MIB;
1564 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1565 }
1566 } else {
1567 // t2LDR_PRE, t2LDR_POST
1568 auto MIB =
1569 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1571 .addReg(Base)
1572 .addImm(Offset)
1573 .add(predOps(Pred, PredReg))
1574 .cloneMemRefs(*MI);
1575 (void)MIB;
1576 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1577 }
1578 } else {
1579 MachineOperand &MO = MI->getOperand(0);
1580 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1581 // the vestigal zero-reg offset register. When that's fixed, this clause
1582 // can be removed entirely.
1583 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1585 // STR_PRE, STR_POST
1586 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1587 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1588 .addReg(Base)
1589 .addReg(0)
1590 .addImm(Imm)
1591 .add(predOps(Pred, PredReg))
1592 .cloneMemRefs(*MI);
1593 (void)MIB;
1594 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1595 } else {
1596 // t2STR_PRE, t2STR_POST
1597 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1598 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1599 .addReg(Base)
1600 .addImm(Offset)
1601 .add(predOps(Pred, PredReg))
1602 .cloneMemRefs(*MI);
1603 (void)MIB;
1604 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1605 }
1606 }
1607 MBB.erase(MBBI);
1608
1609 return true;
1610}
1611
1612bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1613 unsigned Opcode = MI.getOpcode();
1614 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1615 "Must have t2STRDi8 or t2LDRDi8");
1616 if (MI.getOperand(3).getImm() != 0)
1617 return false;
1618 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1619
1620 // Behaviour for writeback is undefined if base register is the same as one
1621 // of the others.
1622 const MachineOperand &BaseOp = MI.getOperand(2);
1623 Register Base = BaseOp.getReg();
1624 const MachineOperand &Reg0Op = MI.getOperand(0);
1625 const MachineOperand &Reg1Op = MI.getOperand(1);
1626 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1627 return false;
1628
1629 Register PredReg;
1630 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1632 MachineBasicBlock &MBB = *MI.getParent();
1633 int Offset;
1635 PredReg, Offset);
1636 unsigned NewOpc;
1637 if (Offset == 8 || Offset == -8) {
1638 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1639 } else {
1640 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1641 if (MergeInstr == MBB.end())
1642 return false;
1643 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1644 if (!isLegalAddressImm(NewOpc, Offset, TII))
1645 return false;
1646 }
1647 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1648 MBB.erase(MergeInstr);
1649
1650 DebugLoc DL = MI.getDebugLoc();
1651 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1652 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1653 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1654 } else {
1655 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1656 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1657 }
1658 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1659 .addImm(Offset).addImm(Pred).addReg(PredReg);
1660 assert(TII->get(Opcode).getNumOperands() == 6 &&
1661 TII->get(NewOpc).getNumOperands() == 7 &&
1662 "Unexpected number of operands in Opcode specification.");
1663
1664 // Transfer implicit operands.
1665 for (const MachineOperand &MO : MI.implicit_operands())
1666 MIB.add(MO);
1667 MIB.cloneMemRefs(MI);
1668
1669 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1670 MBB.erase(MBBI);
1671 return true;
1672}
1673
1674/// Returns true if instruction is a memory operation that this pass is capable
1675/// of operating on.
1676static bool isMemoryOp(const MachineInstr &MI) {
1677 unsigned Opcode = MI.getOpcode();
1678 switch (Opcode) {
1679 case ARM::VLDRS:
1680 case ARM::VSTRS:
1681 case ARM::VLDRD:
1682 case ARM::VSTRD:
1683 case ARM::LDRi12:
1684 case ARM::STRi12:
1685 case ARM::tLDRi:
1686 case ARM::tSTRi:
1687 case ARM::tLDRspi:
1688 case ARM::tSTRspi:
1689 case ARM::t2LDRi8:
1690 case ARM::t2LDRi12:
1691 case ARM::t2STRi8:
1692 case ARM::t2STRi12:
1693 break;
1694 default:
1695 return false;
1696 }
1697 if (!MI.getOperand(1).isReg())
1698 return false;
1699
1700 // When no memory operands are present, conservatively assume unaligned,
1701 // volatile, unfoldable.
1702 if (!MI.hasOneMemOperand())
1703 return false;
1704
1705 const MachineMemOperand &MMO = **MI.memoperands_begin();
1706
1707 // Don't touch volatile memory accesses - we may be changing their order.
1708 // TODO: We could allow unordered and monotonic atomics here, but we need to
1709 // make sure the resulting ldm/stm is correctly marked as atomic.
1710 if (MMO.isVolatile() || MMO.isAtomic())
1711 return false;
1712
1713 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1714 // not.
1715 if (MMO.getAlign() < Align(4))
1716 return false;
1717
1718 // str <undef> could probably be eliminated entirely, but for now we just want
1719 // to avoid making a mess of it.
1720 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1721 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1722 return false;
1723
1724 // Likewise don't mess with references to undefined addresses.
1725 if (MI.getOperand(1).isUndef())
1726 return false;
1727
1728 return true;
1729}
1730
1733 bool isDef, unsigned NewOpc, unsigned Reg,
1734 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1735 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1736 unsigned PredReg, const TargetInstrInfo *TII,
1737 MachineInstr *MI) {
1738 if (isDef) {
1739 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1740 TII->get(NewOpc))
1741 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1742 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1743 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1744 // FIXME: This is overly conservative; the new instruction accesses 4
1745 // bytes, not 8.
1746 MIB.cloneMemRefs(*MI);
1747 } else {
1748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1749 TII->get(NewOpc))
1750 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1751 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1752 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1753 // FIXME: This is overly conservative; the new instruction accesses 4
1754 // bytes, not 8.
1755 MIB.cloneMemRefs(*MI);
1756 }
1757}
1758
1759bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1761 MachineInstr *MI = &*MBBI;
1762 unsigned Opcode = MI->getOpcode();
1763 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1764 // if we see this opcode.
1765 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1766 return false;
1767
1768 const MachineOperand &BaseOp = MI->getOperand(2);
1769 Register BaseReg = BaseOp.getReg();
1770 Register EvenReg = MI->getOperand(0).getReg();
1771 Register OddReg = MI->getOperand(1).getReg();
1772 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1773 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1774
1775 // ARM errata 602117: LDRD with base in list may result in incorrect base
1776 // register when interrupted or faulted.
1777 bool Errata602117 = EvenReg == BaseReg &&
1778 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1779 // ARM LDRD/STRD needs consecutive registers.
1780 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1781 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1782
1783 if (!Errata602117 && !NonConsecutiveRegs)
1784 return false;
1785
1786 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1787 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1788 bool EvenDeadKill = isLd ?
1789 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1790 bool EvenUndef = MI->getOperand(0).isUndef();
1791 bool OddDeadKill = isLd ?
1792 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1793 bool OddUndef = MI->getOperand(1).isUndef();
1794 bool BaseKill = BaseOp.isKill();
1795 bool BaseUndef = BaseOp.isUndef();
1796 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1797 "register offset not handled below");
1798 int OffImm = getMemoryOpOffset(*MI);
1799 Register PredReg;
1800 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1801
1802 if (OddRegNum > EvenRegNum && OffImm == 0) {
1803 // Ascending register numbers and no offset. It's safe to change it to a
1804 // ldm or stm.
1805 unsigned NewOpc = (isLd)
1806 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1807 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1808 if (isLd) {
1809 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1810 .addReg(BaseReg, getKillRegState(BaseKill))
1811 .addImm(Pred).addReg(PredReg)
1812 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1813 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1814 .cloneMemRefs(*MI);
1815 ++NumLDRD2LDM;
1816 } else {
1817 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1818 .addReg(BaseReg, getKillRegState(BaseKill))
1819 .addImm(Pred).addReg(PredReg)
1820 .addReg(EvenReg,
1821 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1822 .addReg(OddReg,
1823 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1824 .cloneMemRefs(*MI);
1825 ++NumSTRD2STM;
1826 }
1827 } else {
1828 // Split into two instructions.
1829 unsigned NewOpc = (isLd)
1830 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1831 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1832 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1833 // so adjust and use t2LDRi12 here for that.
1834 unsigned NewOpc2 = (isLd)
1835 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1836 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1837 // If this is a load, make sure the first load does not clobber the base
1838 // register before the second load reads it.
1839 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1840 assert(!TRI->regsOverlap(OddReg, BaseReg));
1841 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1842 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1843 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1844 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1845 MI);
1846 } else {
1847 if (OddReg == EvenReg && EvenDeadKill) {
1848 // If the two source operands are the same, the kill marker is
1849 // probably on the first one. e.g.
1850 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1851 EvenDeadKill = false;
1852 OddDeadKill = true;
1853 }
1854 // Never kill the base register in the first instruction.
1855 if (EvenReg == BaseReg)
1856 EvenDeadKill = false;
1857 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1858 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1859 MI);
1860 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1861 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1862 MI);
1863 }
1864 if (isLd)
1865 ++NumLDRD2LDR;
1866 else
1867 ++NumSTRD2STR;
1868 }
1869
1870 MBBI = MBB.erase(MBBI);
1871 return true;
1872}
1873
1874/// An optimization pass to turn multiple LDR / STR ops of the same base and
1875/// incrementing offset into LDM / STM ops.
1876bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1877 MemOpQueue MemOps;
1878 unsigned CurrBase = 0;
1879 unsigned CurrOpc = ~0u;
1880 ARMCC::CondCodes CurrPred = ARMCC::AL;
1881 unsigned Position = 0;
1882 assert(Candidates.size() == 0);
1883 assert(MergeBaseCandidates.size() == 0);
1884 LiveRegsValid = false;
1885
1887 I = MBBI) {
1888 // The instruction in front of the iterator is the one we look at.
1889 MBBI = std::prev(I);
1890 if (FixInvalidRegPairOp(MBB, MBBI))
1891 continue;
1892 ++Position;
1893
1894 if (isMemoryOp(*MBBI)) {
1895 unsigned Opcode = MBBI->getOpcode();
1896 const MachineOperand &MO = MBBI->getOperand(0);
1897 Register Reg = MO.getReg();
1899 Register PredReg;
1900 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1902 if (CurrBase == 0) {
1903 // Start of a new chain.
1904 CurrBase = Base;
1905 CurrOpc = Opcode;
1906 CurrPred = Pred;
1907 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1908 continue;
1909 }
1910 // Note: No need to match PredReg in the next if.
1911 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1912 // Watch out for:
1913 // r4 := ldr [r0, #8]
1914 // r4 := ldr [r0, #4]
1915 // or
1916 // r0 := ldr [r0]
1917 // If a load overrides the base register or a register loaded by
1918 // another load in our chain, we cannot take this instruction.
1919 bool Overlap = false;
1920 if (isLoadSingle(Opcode)) {
1921 Overlap = (Base == Reg);
1922 if (!Overlap) {
1923 for (const MemOpQueueEntry &E : MemOps) {
1924 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1925 Overlap = true;
1926 break;
1927 }
1928 }
1929 }
1930 }
1931
1932 if (!Overlap) {
1933 // Check offset and sort memory operation into the current chain.
1934 if (Offset > MemOps.back().Offset) {
1935 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1936 continue;
1937 } else {
1938 MemOpQueue::iterator MI, ME;
1939 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1940 if (Offset < MI->Offset) {
1941 // Found a place to insert.
1942 break;
1943 }
1944 if (Offset == MI->Offset) {
1945 // Collision, abort.
1946 MI = ME;
1947 break;
1948 }
1949 }
1950 if (MI != MemOps.end()) {
1951 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1952 continue;
1953 }
1954 }
1955 }
1956 }
1957
1958 // Don't advance the iterator; The op will start a new chain next.
1959 MBBI = I;
1960 --Position;
1961 // Fallthrough to look into existing chain.
1962 } else if (MBBI->isDebugInstr()) {
1963 continue;
1964 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1965 MBBI->getOpcode() == ARM::t2STRDi8) {
1966 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1967 // remember them because we may still be able to merge add/sub into them.
1968 MergeBaseCandidates.push_back(&*MBBI);
1969 }
1970
1971 // If we are here then the chain is broken; Extract candidates for a merge.
1972 if (MemOps.size() > 0) {
1973 FormCandidates(MemOps);
1974 // Reset for the next chain.
1975 CurrBase = 0;
1976 CurrOpc = ~0u;
1977 CurrPred = ARMCC::AL;
1978 MemOps.clear();
1979 }
1980 }
1981 if (MemOps.size() > 0)
1982 FormCandidates(MemOps);
1983
1984 // Sort candidates so they get processed from end to begin of the basic
1985 // block later; This is necessary for liveness calculation.
1986 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1987 return M0->InsertPos < M1->InsertPos;
1988 };
1989 llvm::sort(Candidates, LessThan);
1990
1991 // Go through list of candidates and merge.
1992 bool Changed = false;
1993 for (const MergeCandidate *Candidate : Candidates) {
1994 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1995 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1996 // Merge preceding/trailing base inc/dec into the merged op.
1997 if (Merged) {
1998 Changed = true;
1999 unsigned Opcode = Merged->getOpcode();
2000 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2001 MergeBaseUpdateLSDouble(*Merged);
2002 else
2003 MergeBaseUpdateLSMultiple(Merged);
2004 } else {
2005 for (MachineInstr *MI : Candidate->Instrs) {
2006 if (MergeBaseUpdateLoadStore(MI))
2007 Changed = true;
2008 }
2009 }
2010 } else {
2011 assert(Candidate->Instrs.size() == 1);
2012 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2013 Changed = true;
2014 }
2015 }
2016 Candidates.clear();
2017 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2018 for (MachineInstr *MI : MergeBaseCandidates)
2019 MergeBaseUpdateLSDouble(*MI);
2020 MergeBaseCandidates.clear();
2021
2022 return Changed;
2023}
2024
2025/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2026/// into the preceding stack restore so it directly restore the value of LR
2027/// into pc.
2028/// ldmfd sp!, {..., lr}
2029/// bx lr
2030/// or
2031/// ldmfd sp!, {..., lr}
2032/// mov pc, lr
2033/// =>
2034/// ldmfd sp!, {..., pc}
2035bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2036 // Thumb1 LDM doesn't allow high registers.
2037 if (isThumb1) return false;
2038 if (MBB.empty()) return false;
2039
2041 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2042 (MBBI->getOpcode() == ARM::BX_RET ||
2043 MBBI->getOpcode() == ARM::tBX_RET ||
2044 MBBI->getOpcode() == ARM::MOVPCLR)) {
2045 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2046 // Ignore any debug instructions.
2047 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2048 --PrevI;
2049 MachineInstr &PrevMI = *PrevI;
2050 unsigned Opcode = PrevMI.getOpcode();
2051 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2052 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2053 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2054 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2055 if (MO.getReg() != ARM::LR)
2056 return false;
2057 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2058 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2059 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2060 PrevMI.setDesc(TII->get(NewOpc));
2061 MO.setReg(ARM::PC);
2062 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2063 MBB.erase(MBBI);
2064 return true;
2065 }
2066 }
2067 return false;
2068}
2069
2070bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2072 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2073 MBBI->getOpcode() != ARM::tBX_RET)
2074 return false;
2075
2077 --Prev;
2078 if (Prev->getOpcode() != ARM::tMOVr ||
2079 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2080 return false;
2081
2082 for (auto Use : Prev->uses())
2083 if (Use.isKill()) {
2084 assert(STI->hasV4TOps());
2085 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2086 .addReg(Use.getReg(), RegState::Kill)
2089 MBB.erase(MBBI);
2090 MBB.erase(Prev);
2091 return true;
2092 }
2093
2094 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2095}
2096
2097bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2098 if (skipFunction(Fn.getFunction()))
2099 return false;
2100
2101 MF = &Fn;
2102 STI = &Fn.getSubtarget<ARMSubtarget>();
2103 TL = STI->getTargetLowering();
2104 AFI = Fn.getInfo<ARMFunctionInfo>();
2105 TII = STI->getInstrInfo();
2106 TRI = STI->getRegisterInfo();
2107
2108 RegClassInfoValid = false;
2109 isThumb2 = AFI->isThumb2Function();
2110 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2111
2112 bool Modified = false, ModifiedLDMReturn = false;
2113 for (MachineBasicBlock &MBB : Fn) {
2114 Modified |= LoadStoreMultipleOpti(MBB);
2115 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2116 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2117 if (isThumb1)
2118 Modified |= CombineMovBx(MBB);
2119 }
2120 Modified |= ModifiedLDMReturn;
2121
2122 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2123 // LR is restored. This check needs to consider the whole function, not just
2124 // the instruction(s) we changed, because there may be other BX returns which
2125 // still need LR to be restored.
2126 if (ModifiedLDMReturn)
2128
2129 Allocator.DestroyAll();
2130 return Modified;
2131}
2132
2133#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2134 "ARM pre- register allocation load / store optimization pass"
2135
2136namespace {
2137
2138 /// Pre- register allocation pass that move load / stores from consecutive
2139 /// locations close to make it more likely they will be combined later.
2140 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2141 static char ID;
2142
2143 AliasAnalysis *AA;
2144 const DataLayout *TD;
2145 const TargetInstrInfo *TII;
2146 const TargetRegisterInfo *TRI;
2147 const ARMSubtarget *STI;
2150 MachineFunction *MF;
2151
2152 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2153
2154 bool runOnMachineFunction(MachineFunction &Fn) override;
2155
2156 StringRef getPassName() const override {
2158 }
2159
2160 void getAnalysisUsage(AnalysisUsage &AU) const override {
2165 }
2166
2167 private:
2168 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2169 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2170 Register &BaseReg, int &Offset, Register &PredReg,
2171 ARMCC::CondCodes &Pred, bool &isT2);
2172 bool RescheduleOps(
2174 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2176 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2177 bool DistributeIncrements();
2178 bool DistributeIncrements(Register Base);
2179 };
2180
2181} // end anonymous namespace
2182
2183char ARMPreAllocLoadStoreOpt::ID = 0;
2184
2185INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2188INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2190
2191// Limit the number of instructions to be rescheduled.
2192// FIXME: tune this limit, and/or come up with some better heuristics.
2193static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2194 cl::init(8), cl::Hidden);
2195
2196bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2197 if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2198 return false;
2199
2200 TD = &Fn.getDataLayout();
2201 STI = &Fn.getSubtarget<ARMSubtarget>();
2202 TII = STI->getInstrInfo();
2203 TRI = STI->getRegisterInfo();
2204 MRI = &Fn.getRegInfo();
2205 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2206 MF = &Fn;
2207 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2208
2209 bool Modified = DistributeIncrements();
2210 for (MachineBasicBlock &MFI : Fn)
2211 Modified |= RescheduleLoadStoreInstrs(&MFI);
2212
2213 return Modified;
2214}
2215
2216static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2220 SmallSet<unsigned, 4> &MemRegs,
2221 const TargetRegisterInfo *TRI,
2222 AliasAnalysis *AA) {
2223 // Are there stores / loads / calls between them?
2224 SmallSet<unsigned, 4> AddedRegPressure;
2225 while (++I != E) {
2226 if (I->isDebugInstr() || MemOps.count(&*I))
2227 continue;
2228 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2229 return false;
2230 if (I->mayStore() || (!isLd && I->mayLoad()))
2231 for (MachineInstr *MemOp : MemOps)
2232 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2233 return false;
2234 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2235 MachineOperand &MO = I->getOperand(j);
2236 if (!MO.isReg())
2237 continue;
2238 Register Reg = MO.getReg();
2239 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2240 return false;
2241 if (Reg != Base && !MemRegs.count(Reg))
2242 AddedRegPressure.insert(Reg);
2243 }
2244 }
2245
2246 // Estimate register pressure increase due to the transformation.
2247 if (MemRegs.size() <= 4)
2248 // Ok if we are moving small number of instructions.
2249 return true;
2250 return AddedRegPressure.size() <= MemRegs.size() * 2;
2251}
2252
2253bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2254 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2255 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2256 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2257 // Make sure we're allowed to generate LDRD/STRD.
2258 if (!STI->hasV5TEOps())
2259 return false;
2260
2261 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2262 unsigned Scale = 1;
2263 unsigned Opcode = Op0->getOpcode();
2264 if (Opcode == ARM::LDRi12) {
2265 NewOpc = ARM::LDRD;
2266 } else if (Opcode == ARM::STRi12) {
2267 NewOpc = ARM::STRD;
2268 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2269 NewOpc = ARM::t2LDRDi8;
2270 Scale = 4;
2271 isT2 = true;
2272 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2273 NewOpc = ARM::t2STRDi8;
2274 Scale = 4;
2275 isT2 = true;
2276 } else {
2277 return false;
2278 }
2279
2280 // Make sure the base address satisfies i64 ld / st alignment requirement.
2281 // At the moment, we ignore the memoryoperand's value.
2282 // If we want to use AliasAnalysis, we should check it accordingly.
2283 if (!Op0->hasOneMemOperand() ||
2284 (*Op0->memoperands_begin())->isVolatile() ||
2285 (*Op0->memoperands_begin())->isAtomic())
2286 return false;
2287
2288 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2289 Align ReqAlign = STI->getDualLoadStoreAlignment();
2290 if (Alignment < ReqAlign)
2291 return false;
2292
2293 // Then make sure the immediate offset fits.
2294 int OffImm = getMemoryOpOffset(*Op0);
2295 if (isT2) {
2296 int Limit = (1 << 8) * Scale;
2297 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2298 return false;
2299 Offset = OffImm;
2300 } else {
2302 if (OffImm < 0) {
2304 OffImm = - OffImm;
2305 }
2306 int Limit = (1 << 8) * Scale;
2307 if (OffImm >= Limit || (OffImm & (Scale-1)))
2308 return false;
2309 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2310 }
2311 FirstReg = Op0->getOperand(0).getReg();
2312 SecondReg = Op1->getOperand(0).getReg();
2313 if (FirstReg == SecondReg)
2314 return false;
2315 BaseReg = Op0->getOperand(1).getReg();
2316 Pred = getInstrPredicate(*Op0, PredReg);
2317 dl = Op0->getDebugLoc();
2318 return true;
2319}
2320
2321bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2323 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2325 bool RetVal = false;
2326
2327 // Sort by offset (in reverse order).
2328 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2329 int LOffset = getMemoryOpOffset(*LHS);
2330 int ROffset = getMemoryOpOffset(*RHS);
2331 assert(LHS == RHS || LOffset != ROffset);
2332 return LOffset > ROffset;
2333 });
2334
2335 // The loads / stores of the same base are in order. Scan them from first to
2336 // last and check for the following:
2337 // 1. Any def of base.
2338 // 2. Any gaps.
2339 while (Ops.size() > 1) {
2340 unsigned FirstLoc = ~0U;
2341 unsigned LastLoc = 0;
2342 MachineInstr *FirstOp = nullptr;
2343 MachineInstr *LastOp = nullptr;
2344 int LastOffset = 0;
2345 unsigned LastOpcode = 0;
2346 unsigned LastBytes = 0;
2347 unsigned NumMove = 0;
2348 for (MachineInstr *Op : llvm::reverse(Ops)) {
2349 // Make sure each operation has the same kind.
2350 unsigned LSMOpcode
2351 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2352 if (LastOpcode && LSMOpcode != LastOpcode)
2353 break;
2354
2355 // Check that we have a continuous set of offsets.
2356 int Offset = getMemoryOpOffset(*Op);
2357 unsigned Bytes = getLSMultipleTransferSize(Op);
2358 if (LastBytes) {
2359 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2360 break;
2361 }
2362
2363 // Don't try to reschedule too many instructions.
2364 if (NumMove == InstReorderLimit)
2365 break;
2366
2367 // Found a mergable instruction; save information about it.
2368 ++NumMove;
2369 LastOffset = Offset;
2370 LastBytes = Bytes;
2371 LastOpcode = LSMOpcode;
2372
2373 unsigned Loc = MI2LocMap[Op];
2374 if (Loc <= FirstLoc) {
2375 FirstLoc = Loc;
2376 FirstOp = Op;
2377 }
2378 if (Loc >= LastLoc) {
2379 LastLoc = Loc;
2380 LastOp = Op;
2381 }
2382 }
2383
2384 if (NumMove <= 1)
2385 Ops.pop_back();
2386 else {
2388 SmallSet<unsigned, 4> MemRegs;
2389 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2390 MemOps.insert(Ops[i]);
2391 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2392 }
2393
2394 // Be conservative, if the instructions are too far apart, don't
2395 // move them. We want to limit the increase of register pressure.
2396 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2397 if (DoMove)
2398 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2399 MemOps, MemRegs, TRI, AA);
2400 if (!DoMove) {
2401 for (unsigned i = 0; i != NumMove; ++i)
2402 Ops.pop_back();
2403 } else {
2404 // This is the new location for the loads / stores.
2405 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2406 while (InsertPos != MBB->end() &&
2407 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2408 ++InsertPos;
2409
2410 // If we are moving a pair of loads / stores, see if it makes sense
2411 // to try to allocate a pair of registers that can form register pairs.
2412 MachineInstr *Op0 = Ops.back();
2413 MachineInstr *Op1 = Ops[Ops.size()-2];
2414 Register FirstReg, SecondReg;
2415 Register BaseReg, PredReg;
2417 bool isT2 = false;
2418 unsigned NewOpc = 0;
2419 int Offset = 0;
2420 DebugLoc dl;
2421 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2422 FirstReg, SecondReg, BaseReg,
2423 Offset, PredReg, Pred, isT2)) {
2424 Ops.pop_back();
2425 Ops.pop_back();
2426
2427 const MCInstrDesc &MCID = TII->get(NewOpc);
2428 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2429 MRI->constrainRegClass(FirstReg, TRC);
2430 MRI->constrainRegClass(SecondReg, TRC);
2431
2432 // Form the pair instruction.
2433 if (isLd) {
2434 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2435 .addReg(FirstReg, RegState::Define)
2436 .addReg(SecondReg, RegState::Define)
2437 .addReg(BaseReg);
2438 // FIXME: We're converting from LDRi12 to an insn that still
2439 // uses addrmode2, so we need an explicit offset reg. It should
2440 // always by reg0 since we're transforming LDRi12s.
2441 if (!isT2)
2442 MIB.addReg(0);
2443 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2444 MIB.cloneMergedMemRefs({Op0, Op1});
2445 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2446 ++NumLDRDFormed;
2447 } else {
2448 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2449 .addReg(FirstReg)
2450 .addReg(SecondReg)
2451 .addReg(BaseReg);
2452 // FIXME: We're converting from LDRi12 to an insn that still
2453 // uses addrmode2, so we need an explicit offset reg. It should
2454 // always by reg0 since we're transforming STRi12s.
2455 if (!isT2)
2456 MIB.addReg(0);
2457 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2458 MIB.cloneMergedMemRefs({Op0, Op1});
2459 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2460 ++NumSTRDFormed;
2461 }
2462 MBB->erase(Op0);
2463 MBB->erase(Op1);
2464
2465 if (!isT2) {
2466 // Add register allocation hints to form register pairs.
2467 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2468 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2469 }
2470 } else {
2471 for (unsigned i = 0; i != NumMove; ++i) {
2472 MachineInstr *Op = Ops.pop_back_val();
2473 if (isLd) {
2474 // Populate RegisterMap with all Registers defined by loads.
2475 Register Reg = Op->getOperand(0).getReg();
2476 RegisterMap[Reg];
2477 }
2478
2479 MBB->splice(InsertPos, MBB, Op);
2480 }
2481 }
2482
2483 NumLdStMoved += NumMove;
2484 RetVal = true;
2485 }
2486 }
2487 }
2488
2489 return RetVal;
2490}
2491
2493 std::function<void(MachineOperand &)> Fn) {
2494 if (MI->isNonListDebugValue()) {
2495 auto &Op = MI->getOperand(0);
2496 if (Op.isReg())
2497 Fn(Op);
2498 } else {
2499 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2500 auto &Op = MI->getOperand(I);
2501 if (Op.isReg())
2502 Fn(Op);
2503 }
2504 }
2505}
2506
2507// Update the RegisterMap with the instruction that was moved because a
2508// DBG_VALUE_LIST may need to be moved again.
2511 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2512
2513 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2514 auto RegIt = RegisterMap.find(Op.getReg());
2515 if (RegIt == RegisterMap.end())
2516 return;
2517 auto &InstrVec = RegIt->getSecond();
2518 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2519 });
2520}
2521
2523 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2524 MI->getDebugLoc()->getInlinedAt());
2525 return DbgVar;
2526}
2527
2528bool
2529ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2530 bool RetVal = false;
2531
2534 using BaseVec = SmallVector<unsigned, 4>;
2535 Base2InstMap Base2LdsMap;
2536 Base2InstMap Base2StsMap;
2537 BaseVec LdBases;
2538 BaseVec StBases;
2539 // This map is used to track the relationship between the virtual
2540 // register that is the result of a load that is moved and the DBG_VALUE
2541 // MachineInstr pointer that uses that virtual register.
2543
2544 unsigned Loc = 0;
2547 while (MBBI != E) {
2548 for (; MBBI != E; ++MBBI) {
2549 MachineInstr &MI = *MBBI;
2550 if (MI.isCall() || MI.isTerminator()) {
2551 // Stop at barriers.
2552 ++MBBI;
2553 break;
2554 }
2555
2556 if (!MI.isDebugInstr())
2557 MI2LocMap[&MI] = ++Loc;
2558
2559 if (!isMemoryOp(MI))
2560 continue;
2561 Register PredReg;
2562 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2563 continue;
2564
2565 int Opc = MI.getOpcode();
2566 bool isLd = isLoadSingle(Opc);
2567 Register Base = MI.getOperand(1).getReg();
2569 bool StopHere = false;
2570 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2571 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2572 if (Inserted) {
2573 BI->second.push_back(&MI);
2574 Bases.push_back(Base);
2575 return;
2576 }
2577 for (const MachineInstr *MI : BI->second) {
2578 if (Offset == getMemoryOpOffset(*MI)) {
2579 StopHere = true;
2580 break;
2581 }
2582 }
2583 if (!StopHere)
2584 BI->second.push_back(&MI);
2585 };
2586
2587 if (isLd)
2588 FindBases(Base2LdsMap, LdBases);
2589 else
2590 FindBases(Base2StsMap, StBases);
2591
2592 if (StopHere) {
2593 // Found a duplicate (a base+offset combination that's seen earlier).
2594 // Backtrack.
2595 --Loc;
2596 break;
2597 }
2598 }
2599
2600 // Re-schedule loads.
2601 for (unsigned Base : LdBases) {
2602 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2603 if (Lds.size() > 1)
2604 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2605 }
2606
2607 // Re-schedule stores.
2608 for (unsigned Base : StBases) {
2609 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2610 if (Sts.size() > 1)
2611 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2612 }
2613
2614 if (MBBI != E) {
2615 Base2LdsMap.clear();
2616 Base2StsMap.clear();
2617 LdBases.clear();
2618 StBases.clear();
2619 }
2620 }
2621
2622 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2623 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2624 // use-before-def, resulting in a loss of debug info.
2625
2626 // Example:
2627 // Before the Pre Register Allocation Load Store Pass
2628 // inst_a
2629 // %2 = ld ...
2630 // inst_b
2631 // DBG_VALUE %2, "x", ...
2632 // %3 = ld ...
2633
2634 // After the Pass:
2635 // inst_a
2636 // inst_b
2637 // DBG_VALUE %2, "x", ...
2638 // %2 = ld ...
2639 // %3 = ld ...
2640
2641 // The code below addresses this by moving the DBG_VALUE to the position
2642 // immediately after the load.
2643
2644 // Example:
2645 // After the code below:
2646 // inst_a
2647 // inst_b
2648 // %2 = ld ...
2649 // DBG_VALUE %2, "x", ...
2650 // %3 = ld ...
2651
2652 // The algorithm works in two phases: First RescheduleOps() populates the
2653 // RegisterMap with registers that were moved as keys, there is no value
2654 // inserted. In the next phase, every MachineInstr in a basic block is
2655 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2656 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2657 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2658 // describes debug information for a variable that already exists in the
2659 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2660 // be set to undef. If the current MachineInstr is a load that was moved,
2661 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2662 // the load.
2663
2664 // To illustrate the above algorithm visually let's take this example.
2665
2666 // Before the Pre Register Allocation Load Store Pass:
2667 // %2 = ld ...
2668 // DBG_VALUE %2, A, .... # X
2669 // DBG_VALUE 0, A, ... # Y
2670 // %3 = ld ...
2671 // DBG_VALUE %3, A, ..., # Z
2672 // %4 = ld ...
2673
2674 // After Pre Register Allocation Load Store Pass:
2675 // DBG_VALUE %2, A, .... # X
2676 // DBG_VALUE 0, A, ... # Y
2677 // DBG_VALUE %3, A, ..., # Z
2678 // %2 = ld ...
2679 // %3 = ld ...
2680 // %4 = ld ...
2681
2682 // The algorithm below does the following:
2683
2684 // In the beginning, the RegisterMap will have been populated with the virtual
2685 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2686 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2687 // InstrMap {}
2688 // -> DBG_VALUE %2, A, .... # X
2689 // DBG_VALUE 0, A, ... # Y
2690 // DBG_VALUE %3, A, ..., # Z
2691 // %2 = ld ...
2692 // %3 = ld ...
2693 // %4 = ld ...
2694
2695 // After the first DBG_VALUE (denoted with an X) is processed, the
2696 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2697 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2698 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2699 // DBG_VALUE %2, A, .... # X
2700 // -> DBG_VALUE 0, A, ... # Y
2701 // DBG_VALUE %3, A, ..., # Z
2702 // %2 = ld ...
2703 // %3 = ld ...
2704 // %4 = ld ...
2705
2706 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2707 // to now hold Y for A and the RegisterMap is also updated to remove X from
2708 // %2, this is because both X and Y describe the same debug variable A. X is
2709 // also updated to have a $noreg as the first operand.
2710 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2711 // InstrMap = {X-> 2}
2712 // DBG_VALUE $noreg, A, .... # X
2713 // DBG_VALUE 0, A, ... # Y
2714 // -> DBG_VALUE %3, A, ..., # Z
2715 // %2 = ld ...
2716 // %3 = ld ...
2717 // %4 = ld ...
2718
2719 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2720 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2721 // is updated to have Z mapped to %3. This is again because Z describes the
2722 // debug variable A, Y is not updated to have $noreg as first operand because
2723 // its first operand is an immediate, not a register.
2724 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2725 // InstrMap = {X -> 2, Z -> 3}
2726 // DBG_VALUE $noreg, A, .... # X
2727 // DBG_VALUE 0, A, ... # Y
2728 // DBG_VALUE %3, A, ..., # Z
2729 // -> %2 = ld ...
2730 // %3 = ld ...
2731 // %4 = ld ...
2732
2733 // Nothing happens here since the RegisterMap for %2 contains no value.
2734 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2735 // InstrMap = {X -> 2, Z -> 3}
2736 // DBG_VALUE $noreg, A, .... # X
2737 // DBG_VALUE 0, A, ... # Y
2738 // DBG_VALUE %3, A, ..., # Z
2739 // %2 = ld ...
2740 // -> %3 = ld ...
2741 // %4 = ld ...
2742
2743 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2744 // pointer Z is copied to come after the load for %3 and the old Z's first
2745 // operand is changed to $noreg the Basic Block iterator is moved to after the
2746 // DBG_VALUE Z's new position.
2747 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2748 // InstrMap = {X -> 2, Z -> 3}
2749 // DBG_VALUE $noreg, A, .... # X
2750 // DBG_VALUE 0, A, ... # Y
2751 // DBG_VALUE $noreg, A, ..., # Old Z
2752 // %2 = ld ...
2753 // %3 = ld ...
2754 // DBG_VALUE %3, A, ..., # Z
2755 // -> %4 = ld ...
2756
2757 // Nothing happens for %4 and the algorithm exits having processed the entire
2758 // Basic Block.
2759 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2760 // InstrMap = {X -> 2, Z -> 3}
2761 // DBG_VALUE $noreg, A, .... # X
2762 // DBG_VALUE 0, A, ... # Y
2763 // DBG_VALUE $noreg, A, ..., # Old Z
2764 // %2 = ld ...
2765 // %3 = ld ...
2766 // DBG_VALUE %3, A, ..., # Z
2767 // %4 = ld ...
2768
2769 // This map is used to track the relationship between
2770 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2771 // debug information for that Debug Variable.
2773 // This map is used to track the relationship between a DBG_VALUE or
2774 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2776 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2777 MachineInstr &MI = *MBBI;
2778
2779 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2780 auto RegIt = RegisterMap.find(Reg);
2781 if (RegIt == RegisterMap.end())
2782 return;
2783 auto &InstrVec = RegIt->getSecond();
2784 InstrVec.push_back(&MI);
2785 InstrMap[&MI].push_back(Reg);
2786 };
2787
2788 if (MI.isDebugValue()) {
2789 assert(MI.getDebugVariable() &&
2790 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2791
2793 // If the first operand is a register and it exists in the RegisterMap, we
2794 // know this is a DBG_VALUE that uses the result of a load that was moved,
2795 // and is therefore a candidate to also be moved, add it to the
2796 // RegisterMap and InstrMap.
2798 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2799 });
2800
2801 // If the current DBG_VALUE describes the same variable as one of the
2802 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2803 // undef. Moving one DBG_VALUE past another would result in the variable's
2804 // value going back in time when stepping through the block in the
2805 // debugger.
2806 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2807 if (InstrIt != DbgValueSinkCandidates.end()) {
2808 auto *Instr = InstrIt->getSecond();
2809 auto RegIt = InstrMap.find(Instr);
2810 if (RegIt != InstrMap.end()) {
2811 const auto &RegVec = RegIt->getSecond();
2812 // For every Register in the RegVec, remove the MachineInstr in the
2813 // RegisterMap that describes the DbgVar.
2814 for (auto &Reg : RegVec) {
2815 auto RegIt = RegisterMap.find(Reg);
2816 if (RegIt == RegisterMap.end())
2817 continue;
2818 auto &InstrVec = RegIt->getSecond();
2819 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2821 return Var == DbgVar;
2822 };
2823
2824 llvm::erase_if(InstrVec, IsDbgVar);
2825 }
2827 [&](MachineOperand &Op) { Op.setReg(0); });
2828 }
2829 }
2830 DbgValueSinkCandidates[DbgVar] = &MI;
2831 } else {
2832 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2833 // then move that DBG_VALUE to below the load.
2834 auto Opc = MI.getOpcode();
2835 if (!isLoadSingle(Opc))
2836 continue;
2837 auto Reg = MI.getOperand(0).getReg();
2838 auto RegIt = RegisterMap.find(Reg);
2839 if (RegIt == RegisterMap.end())
2840 continue;
2841 auto &DbgInstrVec = RegIt->getSecond();
2842 if (!DbgInstrVec.size())
2843 continue;
2844 for (auto *DbgInstr : DbgInstrVec) {
2845 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2846 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2847 MBB->insert(InsertPos, ClonedMI);
2848 MBBI++;
2849 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2850 // that was moved.
2851 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2852 auto DbgIt = DbgValueSinkCandidates.find(DbgVar);
2853 // If the instruction is a DBG_VALUE_LIST, it may have already been
2854 // erased from the DbgValueSinkCandidates. Only erase if it exists in
2855 // the DbgValueSinkCandidates.
2856 if (DbgIt != DbgValueSinkCandidates.end())
2857 DbgValueSinkCandidates.erase(DbgIt);
2858 // Zero out original dbg instr
2859 forEachDbgRegOperand(DbgInstr,
2860 [&](MachineOperand &Op) { Op.setReg(0); });
2861 // Update RegisterMap with ClonedMI because it might have to be moved
2862 // again.
2863 if (DbgInstr->isDebugValueList())
2864 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2865 DbgInstr);
2866 }
2867 }
2868 }
2869 return RetVal;
2870}
2871
2872// Get the Base register operand index from the memory access MachineInst if we
2873// should attempt to distribute postinc on it. Return -1 if not of a valid
2874// instruction type. If it returns an index, it is assumed that instruction is a
2875// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2877 switch (MI.getOpcode()) {
2878 case ARM::MVE_VLDRBS16:
2879 case ARM::MVE_VLDRBS32:
2880 case ARM::MVE_VLDRBU16:
2881 case ARM::MVE_VLDRBU32:
2882 case ARM::MVE_VLDRHS32:
2883 case ARM::MVE_VLDRHU32:
2884 case ARM::MVE_VLDRBU8:
2885 case ARM::MVE_VLDRHU16:
2886 case ARM::MVE_VLDRWU32:
2887 case ARM::MVE_VSTRB16:
2888 case ARM::MVE_VSTRB32:
2889 case ARM::MVE_VSTRH32:
2890 case ARM::MVE_VSTRBU8:
2891 case ARM::MVE_VSTRHU16:
2892 case ARM::MVE_VSTRWU32:
2893 case ARM::t2LDRHi8:
2894 case ARM::t2LDRHi12:
2895 case ARM::t2LDRSHi8:
2896 case ARM::t2LDRSHi12:
2897 case ARM::t2LDRBi8:
2898 case ARM::t2LDRBi12:
2899 case ARM::t2LDRSBi8:
2900 case ARM::t2LDRSBi12:
2901 case ARM::t2STRBi8:
2902 case ARM::t2STRBi12:
2903 case ARM::t2STRHi8:
2904 case ARM::t2STRHi12:
2905 return 1;
2906 case ARM::MVE_VLDRBS16_post:
2907 case ARM::MVE_VLDRBS32_post:
2908 case ARM::MVE_VLDRBU16_post:
2909 case ARM::MVE_VLDRBU32_post:
2910 case ARM::MVE_VLDRHS32_post:
2911 case ARM::MVE_VLDRHU32_post:
2912 case ARM::MVE_VLDRBU8_post:
2913 case ARM::MVE_VLDRHU16_post:
2914 case ARM::MVE_VLDRWU32_post:
2915 case ARM::MVE_VSTRB16_post:
2916 case ARM::MVE_VSTRB32_post:
2917 case ARM::MVE_VSTRH32_post:
2918 case ARM::MVE_VSTRBU8_post:
2919 case ARM::MVE_VSTRHU16_post:
2920 case ARM::MVE_VSTRWU32_post:
2921 case ARM::MVE_VLDRBS16_pre:
2922 case ARM::MVE_VLDRBS32_pre:
2923 case ARM::MVE_VLDRBU16_pre:
2924 case ARM::MVE_VLDRBU32_pre:
2925 case ARM::MVE_VLDRHS32_pre:
2926 case ARM::MVE_VLDRHU32_pre:
2927 case ARM::MVE_VLDRBU8_pre:
2928 case ARM::MVE_VLDRHU16_pre:
2929 case ARM::MVE_VLDRWU32_pre:
2930 case ARM::MVE_VSTRB16_pre:
2931 case ARM::MVE_VSTRB32_pre:
2932 case ARM::MVE_VSTRH32_pre:
2933 case ARM::MVE_VSTRBU8_pre:
2934 case ARM::MVE_VSTRHU16_pre:
2935 case ARM::MVE_VSTRWU32_pre:
2936 return 2;
2937 }
2938 return -1;
2939}
2940
2942 switch (MI.getOpcode()) {
2943 case ARM::MVE_VLDRBS16_post:
2944 case ARM::MVE_VLDRBS32_post:
2945 case ARM::MVE_VLDRBU16_post:
2946 case ARM::MVE_VLDRBU32_post:
2947 case ARM::MVE_VLDRHS32_post:
2948 case ARM::MVE_VLDRHU32_post:
2949 case ARM::MVE_VLDRBU8_post:
2950 case ARM::MVE_VLDRHU16_post:
2951 case ARM::MVE_VLDRWU32_post:
2952 case ARM::MVE_VSTRB16_post:
2953 case ARM::MVE_VSTRB32_post:
2954 case ARM::MVE_VSTRH32_post:
2955 case ARM::MVE_VSTRBU8_post:
2956 case ARM::MVE_VSTRHU16_post:
2957 case ARM::MVE_VSTRWU32_post:
2958 return true;
2959 }
2960 return false;
2961}
2962
2964 switch (MI.getOpcode()) {
2965 case ARM::MVE_VLDRBS16_pre:
2966 case ARM::MVE_VLDRBS32_pre:
2967 case ARM::MVE_VLDRBU16_pre:
2968 case ARM::MVE_VLDRBU32_pre:
2969 case ARM::MVE_VLDRHS32_pre:
2970 case ARM::MVE_VLDRHU32_pre:
2971 case ARM::MVE_VLDRBU8_pre:
2972 case ARM::MVE_VLDRHU16_pre:
2973 case ARM::MVE_VLDRWU32_pre:
2974 case ARM::MVE_VSTRB16_pre:
2975 case ARM::MVE_VSTRB32_pre:
2976 case ARM::MVE_VSTRH32_pre:
2977 case ARM::MVE_VSTRBU8_pre:
2978 case ARM::MVE_VSTRHU16_pre:
2979 case ARM::MVE_VSTRWU32_pre:
2980 return true;
2981 }
2982 return false;
2983}
2984
2985// Given a memory access Opcode, check that the give Imm would be a valid Offset
2986// for this instruction (same as isLegalAddressImm), Or if the instruction
2987// could be easily converted to one where that was valid. For example converting
2988// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
2989// AdjustBaseAndOffset below.
2990static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
2991 const TargetInstrInfo *TII,
2992 int &CodesizeEstimate) {
2993 if (isLegalAddressImm(Opcode, Imm, TII))
2994 return true;
2995
2996 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
2997 const MCInstrDesc &Desc = TII->get(Opcode);
2998 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2999 switch (AddrMode) {
3001 CodesizeEstimate += 1;
3002 return Imm < 0 && -Imm < ((1 << 8) * 1);
3003 }
3004 return false;
3005}
3006
3007// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3008// by -Offset. This can either happen in-place or be a replacement as MI is
3009// converted to another instruction type.
3011 int Offset, const TargetInstrInfo *TII,
3012 const TargetRegisterInfo *TRI) {
3013 // Set the Base reg
3014 unsigned BaseOp = getBaseOperandIndex(*MI);
3015 MI->getOperand(BaseOp).setReg(NewBaseReg);
3016 // and constrain the reg class to that required by the instruction.
3017 MachineFunction *MF = MI->getMF();
3019 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3020 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
3021 MRI.constrainRegClass(NewBaseReg, TRC);
3022
3023 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3024 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3025 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3026 else {
3027 unsigned ConvOpcode;
3028 switch (MI->getOpcode()) {
3029 case ARM::t2LDRHi12:
3030 ConvOpcode = ARM::t2LDRHi8;
3031 break;
3032 case ARM::t2LDRSHi12:
3033 ConvOpcode = ARM::t2LDRSHi8;
3034 break;
3035 case ARM::t2LDRBi12:
3036 ConvOpcode = ARM::t2LDRBi8;
3037 break;
3038 case ARM::t2LDRSBi12:
3039 ConvOpcode = ARM::t2LDRSBi8;
3040 break;
3041 case ARM::t2STRHi12:
3042 ConvOpcode = ARM::t2STRHi8;
3043 break;
3044 case ARM::t2STRBi12:
3045 ConvOpcode = ARM::t2STRBi8;
3046 break;
3047 default:
3048 llvm_unreachable("Unhandled convertable opcode");
3049 }
3050 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3051 "Illegal Address Immediate after convert!");
3052
3053 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3054 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3055 .add(MI->getOperand(0))
3056 .add(MI->getOperand(1))
3057 .addImm(OldOffset - Offset)
3058 .add(MI->getOperand(3))
3059 .add(MI->getOperand(4))
3060 .cloneMemRefs(*MI);
3061 MI->eraseFromParent();
3062 }
3063}
3064
3066 Register NewReg,
3067 const TargetInstrInfo *TII,
3068 const TargetRegisterInfo *TRI) {
3069 MachineFunction *MF = MI->getMF();
3071
3072 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3073 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3074
3075 const MCInstrDesc &MCID = TII->get(NewOpcode);
3076 // Constrain the def register class
3077 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
3078 MRI.constrainRegClass(NewReg, TRC);
3079 // And do the same for the base operand
3080 TRC = TII->getRegClass(MCID, 2, TRI, *MF);
3081 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3082
3083 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3084 switch (AddrMode) {
3088 // Any MVE load/store
3089 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3090 .addReg(NewReg, RegState::Define)
3091 .add(MI->getOperand(0))
3092 .add(MI->getOperand(1))
3093 .addImm(Offset)
3094 .add(MI->getOperand(3))
3095 .add(MI->getOperand(4))
3096 .add(MI->getOperand(5))
3097 .cloneMemRefs(*MI);
3099 if (MI->mayLoad()) {
3100 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3101 .add(MI->getOperand(0))
3102 .addReg(NewReg, RegState::Define)
3103 .add(MI->getOperand(1))
3104 .addImm(Offset)
3105 .add(MI->getOperand(3))
3106 .add(MI->getOperand(4))
3107 .cloneMemRefs(*MI);
3108 } else {
3109 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3110 .addReg(NewReg, RegState::Define)
3111 .add(MI->getOperand(0))
3112 .add(MI->getOperand(1))
3113 .addImm(Offset)
3114 .add(MI->getOperand(3))
3115 .add(MI->getOperand(4))
3116 .cloneMemRefs(*MI);
3117 }
3118 default:
3119 llvm_unreachable("Unhandled createPostIncLoadStore");
3120 }
3121}
3122
3123// Given a Base Register, optimise the load/store uses to attempt to create more
3124// post-inc accesses and less register moves. We do this by taking zero offset
3125// loads/stores with an add, and convert them to a postinc load/store of the
3126// same type. Any subsequent accesses will be adjusted to use and account for
3127// the post-inc value.
3128// For example:
3129// LDR #0 LDR_POSTINC #16
3130// LDR #4 LDR #-12
3131// LDR #8 LDR #-8
3132// LDR #12 LDR #-4
3133// ADD #16
3134//
3135// At the same time if we do not find an increment but do find an existing
3136// pre/post inc instruction, we can still adjust the offsets of subsequent
3137// instructions to save the register move that would otherwise be needed for the
3138// in-place increment.
3139bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3140 // We are looking for:
3141 // One zero offset load/store that can become postinc
3142 MachineInstr *BaseAccess = nullptr;
3143 MachineInstr *PrePostInc = nullptr;
3144 // An increment that can be folded in
3145 MachineInstr *Increment = nullptr;
3146 // Other accesses after BaseAccess that will need to be updated to use the
3147 // postinc value.
3148 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3149 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3150 if (!Increment && getAddSubImmediate(Use) != 0) {
3151 Increment = &Use;
3152 continue;
3153 }
3154
3155 int BaseOp = getBaseOperandIndex(Use);
3156 if (BaseOp == -1)
3157 return false;
3158
3159 if (!Use.getOperand(BaseOp).isReg() ||
3160 Use.getOperand(BaseOp).getReg() != Base)
3161 return false;
3162 if (isPreIndex(Use) || isPostIndex(Use))
3163 PrePostInc = &Use;
3164 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3165 BaseAccess = &Use;
3166 else
3167 OtherAccesses.insert(&Use);
3168 }
3169
3170 int IncrementOffset;
3171 Register NewBaseReg;
3172 if (BaseAccess && Increment) {
3173 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3174 return false;
3175 Register PredReg;
3176 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3177 getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
3178 return false;
3179
3180 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3181 << Base.virtRegIndex() << "\n");
3182
3183 // Make sure that Increment has no uses before BaseAccess that are not PHI
3184 // uses.
3185 for (MachineInstr &Use :
3186 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3187 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3188 !DT->dominates(BaseAccess, &Use))) {
3189 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3190 return false;
3191 }
3192 }
3193
3194 // Make sure that Increment can be folded into Base
3195 IncrementOffset = getAddSubImmediate(*Increment);
3196 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3197 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3198 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3199 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3200 return false;
3201 }
3202 }
3203 else if (PrePostInc) {
3204 // If we already have a pre/post index load/store then set BaseAccess,
3205 // IncrementOffset and NewBaseReg to the values it already produces,
3206 // allowing us to update and subsequent uses of BaseOp reg with the
3207 // incremented value.
3208 if (Increment)
3209 return false;
3210
3211 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3212 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3213 int BaseOp = getBaseOperandIndex(*PrePostInc);
3214 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3215 BaseAccess = PrePostInc;
3216 NewBaseReg = PrePostInc->getOperand(0).getReg();
3217 }
3218 else
3219 return false;
3220
3221 // And make sure that the negative value of increment can be added to all
3222 // other offsets after the BaseAccess. We rely on either
3223 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3224 // to keep things simple.
3225 // This also adds a simple codesize metric, to detect if an instruction (like
3226 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3227 // cannot because it is converted to something else (t2LDRBi8). We start this
3228 // at -1 for the gain from removing the increment.
3229 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3230 int CodesizeEstimate = -1;
3231 for (auto *Use : OtherAccesses) {
3232 if (DT->dominates(BaseAccess, Use)) {
3233 SuccessorAccesses.insert(Use);
3234 unsigned BaseOp = getBaseOperandIndex(*Use);
3235 if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
3236 Use->getOperand(BaseOp + 1).getImm() -
3237 IncrementOffset,
3238 TII, CodesizeEstimate)) {
3239 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3240 return false;
3241 }
3242 } else if (!DT->dominates(Use, BaseAccess)) {
3243 LLVM_DEBUG(
3244 dbgs() << " Unknown dominance relation between Base and Use\n");
3245 return false;
3246 }
3247 }
3248 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3249 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3250 return false;
3251 }
3252
3253 if (!PrePostInc) {
3254 // Replace BaseAccess with a post inc
3255 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3256 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3257 NewBaseReg = Increment->getOperand(0).getReg();
3258 MachineInstr *BaseAccessPost =
3259 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3260 BaseAccess->eraseFromParent();
3261 Increment->eraseFromParent();
3262 (void)BaseAccessPost;
3263 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3264 }
3265
3266 for (auto *Use : SuccessorAccesses) {
3267 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3268 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3269 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3270 }
3271
3272 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3273 // remain.
3274 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3275 Op.setIsKill(false);
3276 return true;
3277}
3278
3279bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3280 bool Changed = false;
3282 for (auto &MBB : *MF) {
3283 for (auto &MI : MBB) {
3284 int BaseOp = getBaseOperandIndex(MI);
3285 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3286 continue;
3287
3288 Register Base = MI.getOperand(BaseOp).getReg();
3289 if (!Base.isVirtual())
3290 continue;
3291
3292 Visited.insert(Base);
3293 }
3294 }
3295
3296 for (auto Base : Visited)
3297 Changed |= DistributeIncrements(Base);
3298
3299 return Changed;
3300}
3301
3302/// Returns an instance of the load / store optimization pass.
3304 if (PreAlloc)
3305 return new ARMPreAllocLoadStoreOpt();
3306 return new ARMLoadStoreOpt();
3307}
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static bool ContainsReg(const ArrayRef< std::pair< unsigned, bool > > &Regs, unsigned Reg)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static int getMemoryOpOffset(const MachineInstr &MI)
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
arm prera ldst opt
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:500
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:242
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:821
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_type size() const
Definition: SmallSet.h:170
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:389
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5304
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Define
Register definition.
@ Kill
The last use of a register.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1529
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
unsigned getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
Definition: VE.h:376
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1866
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
int getAddSubImmediate(MachineInstr &MI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.