LLVM 23.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99/// Post- register allocation pass the combine load / store instructions to
100/// form ldm / stm instructions.
101struct ARMLoadStoreOpt : public MachineFunctionPass {
102 static char ID;
103
104 const MachineFunction *MF;
105 const TargetInstrInfo *TII;
106 const TargetRegisterInfo *TRI;
107 const ARMSubtarget *STI;
108 const TargetLowering *TL;
109 ARMFunctionInfo *AFI;
111 RegisterClassInfo RegClassInfo;
113 bool LiveRegsValid;
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
116
117 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
118
119 bool runOnMachineFunction(MachineFunction &Fn) override;
120
121 MachineFunctionProperties getRequiredProperties() const override {
122 return MachineFunctionProperties().setNoVRegs();
123 }
124
125 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
126
127private:
128 /// A set of load/store MachineInstrs with same base register sorted by
129 /// offset.
130 struct MemOpQueueEntry {
132 int Offset; ///< Load/Store offset.
133 unsigned Position; ///< Position as counted from end of basic block.
134
135 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
136 : MI(&MI), Offset(Offset), Position(Position) {}
137 };
138 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
139
140 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
141 /// merged into a LDM/STM.
142 struct MergeCandidate {
143 /// List of instructions ordered by load/store offset.
145
146 /// Index in Instrs of the instruction being latest in the schedule.
147 unsigned LatestMIIdx;
148
149 /// Index in Instrs of the instruction being earliest in the schedule.
150 unsigned EarliestMIIdx;
151
152 /// Index into the basic block where the merged instruction will be
153 /// inserted. (See MemOpQueueEntry.Position)
154 unsigned InsertPos;
155
156 /// Whether the instructions can be merged into a ldm/stm instruction.
157 bool CanMergeToLSMulti;
158
159 /// Whether the instructions can be merged into a ldrd/strd instruction.
160 bool CanMergeToLSDouble;
161 };
164 SmallVector<MachineInstr *, 4> MergeBaseCandidates;
165
166 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
168 unsigned findFreeReg(const TargetRegisterClass &RegClass);
169 void UpdateBaseRegUses(MachineBasicBlock &MBB,
171 unsigned Base, unsigned WordOffset,
172 ARMCC::CondCodes Pred, unsigned PredReg);
173 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
174 MachineBasicBlock::iterator InsertBefore,
175 int Offset, unsigned Base, bool BaseKill,
176 unsigned Opcode, ARMCC::CondCodes Pred,
177 unsigned PredReg, const DebugLoc &DL,
178 ArrayRef<std::pair<unsigned, bool>> Regs,
180 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
181 MachineBasicBlock::iterator InsertBefore,
182 int Offset, unsigned Base, bool BaseKill,
183 unsigned Opcode, ARMCC::CondCodes Pred,
184 unsigned PredReg, const DebugLoc &DL,
185 ArrayRef<std::pair<unsigned, bool>> Regs,
186 ArrayRef<MachineInstr *> Instrs) const;
187 void FormCandidates(const MemOpQueue &MemOps);
188 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
189 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
191 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
192 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
193 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
194 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
195 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
196 bool CombineMovBx(MachineBasicBlock &MBB);
197};
198
199} // end anonymous namespace
200
201char ARMLoadStoreOpt::ID = 0;
202
203INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
204 false)
205
206static bool definesCPSR(const MachineInstr &MI) {
207 for (const auto &MO : MI.operands()) {
208 if (!MO.isReg())
209 continue;
210 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
211 // If the instruction has live CPSR def, then it's not safe to fold it
212 // into load / store.
213 return true;
214 }
215
216 return false;
217}
218
220 unsigned Opcode = MI.getOpcode();
221 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
222 unsigned NumOperands = MI.getDesc().getNumOperands();
223 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
224
225 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
226 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
227 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
228 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
229 return OffField;
230
231 // Thumb1 immediate offsets are scaled by 4
232 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
233 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
234 return OffField * 4;
235
236 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
237 : ARM_AM::getAM5Offset(OffField) * 4;
238 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
239 : ARM_AM::getAM5Op(OffField);
240
241 if (Op == ARM_AM::sub)
242 return -Offset;
243
244 return Offset;
245}
246
248 return MI.getOperand(1);
249}
250
252 return MI.getOperand(0);
253}
254
256 switch (Opcode) {
257 default: llvm_unreachable("Unhandled opcode!");
258 case ARM::LDRi12:
259 ++NumLDMGened;
260 switch (Mode) {
261 default: llvm_unreachable("Unhandled submode!");
262 case ARM_AM::ia: return ARM::LDMIA;
263 case ARM_AM::da: return ARM::LDMDA;
264 case ARM_AM::db: return ARM::LDMDB;
265 case ARM_AM::ib: return ARM::LDMIB;
266 }
267 case ARM::STRi12:
268 ++NumSTMGened;
269 switch (Mode) {
270 default: llvm_unreachable("Unhandled submode!");
271 case ARM_AM::ia: return ARM::STMIA;
272 case ARM_AM::da: return ARM::STMDA;
273 case ARM_AM::db: return ARM::STMDB;
274 case ARM_AM::ib: return ARM::STMIB;
275 }
276 case ARM::tLDRi:
277 case ARM::tLDRspi:
278 // tLDMIA is writeback-only - unless the base register is in the input
279 // reglist.
280 ++NumLDMGened;
281 switch (Mode) {
282 default: llvm_unreachable("Unhandled submode!");
283 case ARM_AM::ia: return ARM::tLDMIA;
284 }
285 case ARM::tSTRi:
286 case ARM::tSTRspi:
287 // There is no non-writeback tSTMIA either.
288 ++NumSTMGened;
289 switch (Mode) {
290 default: llvm_unreachable("Unhandled submode!");
291 case ARM_AM::ia: return ARM::tSTMIA_UPD;
292 }
293 case ARM::t2LDRi8:
294 case ARM::t2LDRi12:
295 ++NumLDMGened;
296 switch (Mode) {
297 default: llvm_unreachable("Unhandled submode!");
298 case ARM_AM::ia: return ARM::t2LDMIA;
299 case ARM_AM::db: return ARM::t2LDMDB;
300 }
301 case ARM::t2STRi8:
302 case ARM::t2STRi12:
303 ++NumSTMGened;
304 switch (Mode) {
305 default: llvm_unreachable("Unhandled submode!");
306 case ARM_AM::ia: return ARM::t2STMIA;
307 case ARM_AM::db: return ARM::t2STMDB;
308 }
309 case ARM::VLDRS:
310 ++NumVLDMGened;
311 switch (Mode) {
312 default: llvm_unreachable("Unhandled submode!");
313 case ARM_AM::ia: return ARM::VLDMSIA;
314 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
315 }
316 case ARM::VSTRS:
317 ++NumVSTMGened;
318 switch (Mode) {
319 default: llvm_unreachable("Unhandled submode!");
320 case ARM_AM::ia: return ARM::VSTMSIA;
321 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
322 }
323 case ARM::VLDRD:
324 ++NumVLDMGened;
325 switch (Mode) {
326 default: llvm_unreachable("Unhandled submode!");
327 case ARM_AM::ia: return ARM::VLDMDIA;
328 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
329 }
330 case ARM::VSTRD:
331 ++NumVSTMGened;
332 switch (Mode) {
333 default: llvm_unreachable("Unhandled submode!");
334 case ARM_AM::ia: return ARM::VSTMDIA;
335 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
336 }
337 }
338}
339
341 switch (Opcode) {
342 default: llvm_unreachable("Unhandled opcode!");
343 case ARM::LDMIA_RET:
344 case ARM::LDMIA:
345 case ARM::LDMIA_UPD:
346 case ARM::STMIA:
347 case ARM::STMIA_UPD:
348 case ARM::tLDMIA:
349 case ARM::tLDMIA_UPD:
350 case ARM::tSTMIA_UPD:
351 case ARM::t2LDMIA_RET:
352 case ARM::t2LDMIA:
353 case ARM::t2LDMIA_UPD:
354 case ARM::t2STMIA:
355 case ARM::t2STMIA_UPD:
356 case ARM::VLDMSIA:
357 case ARM::VLDMSIA_UPD:
358 case ARM::VSTMSIA:
359 case ARM::VSTMSIA_UPD:
360 case ARM::VLDMDIA:
361 case ARM::VLDMDIA_UPD:
362 case ARM::VSTMDIA:
363 case ARM::VSTMDIA_UPD:
364 return ARM_AM::ia;
365
366 case ARM::LDMDA:
367 case ARM::LDMDA_UPD:
368 case ARM::STMDA:
369 case ARM::STMDA_UPD:
370 return ARM_AM::da;
371
372 case ARM::LDMDB:
373 case ARM::LDMDB_UPD:
374 case ARM::STMDB:
375 case ARM::STMDB_UPD:
376 case ARM::t2LDMDB:
377 case ARM::t2LDMDB_UPD:
378 case ARM::t2STMDB:
379 case ARM::t2STMDB_UPD:
380 case ARM::VLDMSDB_UPD:
381 case ARM::VSTMSDB_UPD:
382 case ARM::VLDMDDB_UPD:
383 case ARM::VSTMDDB_UPD:
384 return ARM_AM::db;
385
386 case ARM::LDMIB:
387 case ARM::LDMIB_UPD:
388 case ARM::STMIB:
389 case ARM::STMIB_UPD:
390 return ARM_AM::ib;
391 }
392}
393
394static bool isT1i32Load(unsigned Opc) {
395 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
396}
397
398static bool isT2i32Load(unsigned Opc) {
399 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
400}
401
402static bool isi32Load(unsigned Opc) {
403 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
404}
405
406static bool isT1i32Store(unsigned Opc) {
407 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
408}
409
410static bool isT2i32Store(unsigned Opc) {
411 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
412}
413
414static bool isi32Store(unsigned Opc) {
415 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
416}
417
418static bool isLoadSingle(unsigned Opc) {
419 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
420}
421
422static unsigned getImmScale(unsigned Opc) {
423 switch (Opc) {
424 default: llvm_unreachable("Unhandled opcode!");
425 case ARM::tLDRi:
426 case ARM::tSTRi:
427 case ARM::tLDRspi:
428 case ARM::tSTRspi:
429 return 1;
430 case ARM::tLDRHi:
431 case ARM::tSTRHi:
432 return 2;
433 case ARM::tLDRBi:
434 case ARM::tSTRBi:
435 return 4;
436 }
437}
438
440 switch (MI->getOpcode()) {
441 default: return 0;
442 case ARM::LDRi12:
443 case ARM::STRi12:
444 case ARM::tLDRi:
445 case ARM::tSTRi:
446 case ARM::tLDRspi:
447 case ARM::tSTRspi:
448 case ARM::t2LDRi8:
449 case ARM::t2LDRi12:
450 case ARM::t2STRi8:
451 case ARM::t2STRi12:
452 case ARM::VLDRS:
453 case ARM::VSTRS:
454 return 4;
455 case ARM::VLDRD:
456 case ARM::VSTRD:
457 return 8;
458 case ARM::LDMIA:
459 case ARM::LDMDA:
460 case ARM::LDMDB:
461 case ARM::LDMIB:
462 case ARM::STMIA:
463 case ARM::STMDA:
464 case ARM::STMDB:
465 case ARM::STMIB:
466 case ARM::tLDMIA:
467 case ARM::tLDMIA_UPD:
468 case ARM::tSTMIA_UPD:
469 case ARM::t2LDMIA:
470 case ARM::t2LDMDB:
471 case ARM::t2STMIA:
472 case ARM::t2STMDB:
473 case ARM::VLDMSIA:
474 case ARM::VSTMSIA:
475 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
476 case ARM::VLDMDIA:
477 case ARM::VSTMDIA:
478 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
479 }
480}
481
482/// Update future uses of the base register with the offset introduced
483/// due to writeback. This function only works on Thumb1.
484void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
486 const DebugLoc &DL, unsigned Base,
487 unsigned WordOffset,
488 ARMCC::CondCodes Pred,
489 unsigned PredReg) {
490 assert(isThumb1 && "Can only update base register uses for Thumb1!");
491 // Start updating any instructions with immediate offsets. Insert a SUB before
492 // the first non-updateable instruction (if any).
493 for (; MBBI != MBB.end(); ++MBBI) {
494 bool InsertSub = false;
495 unsigned Opc = MBBI->getOpcode();
496
497 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
498 int Offset;
499 bool IsLoad =
500 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
501 bool IsStore =
502 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
503
504 if (IsLoad || IsStore) {
505 // Loads and stores with immediate offsets can be updated, but only if
506 // the new offset isn't negative.
507 // The MachineOperand containing the offset immediate is the last one
508 // before predicates.
509 MachineOperand &MO =
510 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
511 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
512 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
513
514 // If storing the base register, it needs to be reset first.
515 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
516
517 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
518 MO.setImm(Offset);
519 else
520 InsertSub = true;
521 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
522 !definesCPSR(*MBBI)) {
523 // SUBS/ADDS using this register, with a dead def of the CPSR.
524 // Merge it with the update; if the merged offset is too large,
525 // insert a new sub instead.
526 MachineOperand &MO =
527 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
528 Offset = (Opc == ARM::tSUBi8) ?
529 MO.getImm() + WordOffset * 4 :
530 MO.getImm() - WordOffset * 4 ;
531 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
532 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
533 // Offset == 0.
534 MO.setImm(Offset);
535 // The base register has now been reset, so exit early.
536 return;
537 } else {
538 InsertSub = true;
539 }
540 } else {
541 // Can't update the instruction.
542 InsertSub = true;
543 }
544 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
545 // Since SUBS sets the condition flags, we can't place the base reset
546 // after an instruction that has a live CPSR def.
547 // The base register might also contain an argument for a function call.
548 InsertSub = true;
549 }
550
551 if (InsertSub) {
552 // An instruction above couldn't be updated, so insert a sub.
553 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
554 .add(t1CondCodeOp(true))
555 .addReg(Base)
556 .addImm(WordOffset * 4)
557 .addImm(Pred)
558 .addReg(PredReg);
559 return;
560 }
561
562 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
563 MBBI->definesRegister(Base, /*TRI=*/nullptr))
564 // Register got killed. Stop updating.
565 return;
566 }
567
568 // End of block was reached.
569 if (!MBB.succ_empty()) {
570 // FIXME: Because of a bug, live registers are sometimes missing from
571 // the successor blocks' live-in sets. This means we can't trust that
572 // information and *always* have to reset at the end of a block.
573 // See PR21029.
574 if (MBBI != MBB.end()) --MBBI;
575 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
576 .add(t1CondCodeOp(true))
577 .addReg(Base)
578 .addImm(WordOffset * 4)
579 .addImm(Pred)
580 .addReg(PredReg);
581 }
582}
583
584/// Return the first register of class \p RegClass that is not in \p Regs.
585unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
586 if (!RegClassInfoValid) {
587 RegClassInfo.runOnMachineFunction(*MF);
588 RegClassInfoValid = true;
589 }
590
591 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
592 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
593 return Reg;
594 return 0;
595}
596
597/// Compute live registers just before instruction \p Before (in normal schedule
598/// direction). Computes backwards so multiple queries in the same block must
599/// come in reverse order.
600void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
602 // Initialize if we never queried in this block.
603 if (!LiveRegsValid) {
604 LiveRegs.init(*TRI);
605 LiveRegs.addLiveOuts(MBB);
606 LiveRegPos = MBB.end();
607 LiveRegsValid = true;
608 }
609 // Move backward just before the "Before" position.
610 while (LiveRegPos != Before) {
611 --LiveRegPos;
612 LiveRegs.stepBackward(*LiveRegPos);
613 }
614}
615
616static bool ContainsReg(ArrayRef<std::pair<unsigned, bool>> Regs,
617 unsigned Reg) {
618 for (const std::pair<unsigned, bool> &R : Regs)
619 if (R.first == Reg)
620 return true;
621 return false;
622}
623
624/// Create and insert a LDM or STM with Base as base register and registers in
625/// Regs as the register operands that would be loaded / stored. It returns
626/// true if the transformation is done.
627MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
628 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
629 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
630 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
631 ArrayRef<std::pair<unsigned, bool>> Regs,
633 unsigned NumRegs = Regs.size();
634 assert(NumRegs > 1);
635
636 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
637 // Compute liveness information for that register to make the decision.
638 bool SafeToClobberCPSR = !isThumb1 ||
639 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
641
642 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
643
644 // Exception: If the base register is in the input reglist, Thumb1 LDM is
645 // non-writeback.
646 // It's also not possible to merge an STR of the base register in Thumb1.
647 if (isThumb1 && ContainsReg(Regs, Base)) {
648 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
649 if (Opcode == ARM::tLDRi)
650 Writeback = false;
651 else if (Opcode == ARM::tSTRi)
652 return nullptr;
653 }
654
656 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
657 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
658 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
659
660 if (Offset == 4 && haveIBAndDA) {
662 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
664 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
665 // VLDM/VSTM do not support DB mode without also updating the base reg.
667 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
668 // Check if this is a supported opcode before inserting instructions to
669 // calculate a new base register.
670 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
671
672 // If starting offset isn't zero, insert a MI to materialize a new base.
673 // But only do so if it is cost effective, i.e. merging more than two
674 // loads / stores.
675 if (NumRegs <= 2)
676 return nullptr;
677
678 // On Thumb1, it's not worth materializing a new base register without
679 // clobbering the CPSR (i.e. not using ADDS/SUBS).
680 if (!SafeToClobberCPSR)
681 return nullptr;
682
683 unsigned NewBase;
684 if (isi32Load(Opcode)) {
685 // If it is a load, then just use one of the destination registers
686 // as the new base. Will no longer be writeback in Thumb1.
687 NewBase = Regs[NumRegs-1].first;
688 Writeback = false;
689 } else {
690 // Find a free register that we can use as scratch register.
691 moveLiveRegsBefore(MBB, InsertBefore);
692 // The merged instruction does not exist yet but will use several Regs if
693 // it is a Store.
694 if (!isLoadSingle(Opcode))
695 for (const std::pair<unsigned, bool> &R : Regs)
696 LiveRegs.addReg(R.first);
697
698 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
699 if (NewBase == 0)
700 return nullptr;
701 }
702
703 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
704 : ARM::t2ADDri)
705 : (isThumb1 && Base == ARM::SP)
706 ? ARM::tADDrSPi
707 : (isThumb1 && Offset < 8)
708 ? ARM::tADDi3
709 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
710
711 if (Offset < 0) {
712 // FIXME: There are no Thumb1 load/store instructions with negative
713 // offsets. So the Base != ARM::SP might be unnecessary.
714 Offset = -Offset;
715 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
716 : ARM::t2SUBri)
717 : (isThumb1 && Offset < 8 && Base != ARM::SP)
718 ? ARM::tSUBi3
719 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
720 }
721
722 if (!TL->isLegalAddImmediate(Offset))
723 // FIXME: Try add with register operand?
724 return nullptr; // Probably not worth it then.
725
726 // We can only append a kill flag to the add/sub input if the value is not
727 // used in the register list of the stm as well.
728 bool KillOldBase = BaseKill &&
729 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
730
731 if (isThumb1) {
732 // Thumb1: depending on immediate size, use either
733 // ADDS NewBase, Base, #imm3
734 // or
735 // MOV NewBase, Base
736 // ADDS NewBase, #imm8.
737 if (Base != NewBase &&
738 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
739 // Need to insert a MOV to the new base first.
740 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
741 !STI->hasV6Ops()) {
742 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
743 if (Pred != ARMCC::AL)
744 return nullptr;
745 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
746 .addReg(Base, getKillRegState(KillOldBase));
747 } else
748 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
749 .addReg(Base, getKillRegState(KillOldBase))
750 .add(predOps(Pred, PredReg));
751
752 // The following ADDS/SUBS becomes an update.
753 Base = NewBase;
754 KillOldBase = true;
755 }
756 if (BaseOpc == ARM::tADDrSPi) {
757 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
758 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
759 .addReg(Base, getKillRegState(KillOldBase))
760 .addImm(Offset / 4)
761 .add(predOps(Pred, PredReg));
762 } else
763 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
764 .add(t1CondCodeOp(true))
765 .addReg(Base, getKillRegState(KillOldBase))
766 .addImm(Offset)
767 .add(predOps(Pred, PredReg));
768 } else {
769 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
770 .addReg(Base, getKillRegState(KillOldBase))
771 .addImm(Offset)
772 .add(predOps(Pred, PredReg))
773 .add(condCodeOp());
774 }
775 Base = NewBase;
776 BaseKill = true; // New base is always killed straight away.
777 }
778
779 bool isDef = isLoadSingle(Opcode);
780
781 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
782 // base register writeback.
783 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
784 if (!Opcode)
785 return nullptr;
786
787 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
788 // - There is no writeback (LDM of base register),
789 // - the base register is killed by the merged instruction,
790 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
791 // to reset the base register.
792 // Otherwise, don't merge.
793 // It's safe to return here since the code to materialize a new base register
794 // above is also conditional on SafeToClobberCPSR.
795 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
796 return nullptr;
797
798 MachineInstrBuilder MIB;
799
800 if (Writeback) {
801 assert(isThumb1 && "expected Writeback only inThumb1");
802 if (Opcode == ARM::tLDMIA) {
803 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
804 // Update tLDMIA with writeback if necessary.
805 Opcode = ARM::tLDMIA_UPD;
806 }
807
808 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
809
810 // Thumb1: we might need to set base writeback when building the MI.
811 MIB.addReg(Base, getDefRegState(true))
812 .addReg(Base, getKillRegState(BaseKill));
813
814 // The base isn't dead after a merged instruction with writeback.
815 // Insert a sub instruction after the newly formed instruction to reset.
816 if (!BaseKill)
817 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
818 } else {
819 // No writeback, simply build the MachineInstr.
820 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
821 MIB.addReg(Base, getKillRegState(BaseKill));
822 }
823
824 MIB.addImm(Pred).addReg(PredReg);
825
826 for (const std::pair<unsigned, bool> &R : Regs)
827 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
828
829 MIB.cloneMergedMemRefs(Instrs);
830
831 return MIB.getInstr();
832}
833
834MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
835 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
836 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
837 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
838 ArrayRef<std::pair<unsigned, bool>> Regs,
839 ArrayRef<MachineInstr*> Instrs) const {
840 bool IsLoad = isi32Load(Opcode);
841 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
842 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
843
844 assert(Regs.size() == 2);
845 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
846 TII->get(LoadStoreOpcode));
847 if (IsLoad) {
848 MIB.addReg(Regs[0].first, RegState::Define)
849 .addReg(Regs[1].first, RegState::Define);
850 } else {
851 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
852 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
853 }
854 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
855 MIB.cloneMergedMemRefs(Instrs);
856 return MIB.getInstr();
857}
858
859/// Call MergeOps and update MemOps and merges accordingly on success.
860MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
861 const MachineInstr *First = Cand.Instrs.front();
862 unsigned Opcode = First->getOpcode();
863 bool IsLoad = isLoadSingle(Opcode);
865 SmallVector<unsigned, 4> ImpDefs;
866 DenseSet<unsigned> KilledRegs;
867 DenseSet<unsigned> UsedRegs;
868 // Determine list of registers and list of implicit super-register defs.
869 for (const MachineInstr *MI : Cand.Instrs) {
870 const MachineOperand &MO = getLoadStoreRegOp(*MI);
871 Register Reg = MO.getReg();
872 bool IsKill = MO.isKill();
873 if (IsKill)
874 KilledRegs.insert(Reg);
875 Regs.push_back(std::make_pair(Reg, IsKill));
876 UsedRegs.insert(Reg);
877
878 if (IsLoad) {
879 // Collect any implicit defs of super-registers, after merging we can't
880 // be sure anymore that we properly preserved these live ranges and must
881 // removed these implicit operands.
882 for (const MachineOperand &MO : MI->implicit_operands()) {
883 if (!MO.isReg() || !MO.isDef() || MO.isDead())
884 continue;
885 assert(MO.isImplicit());
886 Register DefReg = MO.getReg();
887
888 if (is_contained(ImpDefs, DefReg))
889 continue;
890 // We can ignore cases where the super-reg is read and written.
891 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
892 continue;
893 ImpDefs.push_back(DefReg);
894 }
895 }
896 }
897
898 // Attempt the merge.
900
901 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
902 iterator InsertBefore = std::next(iterator(LatestMI));
903 MachineBasicBlock &MBB = *LatestMI->getParent();
904 unsigned Offset = getMemoryOpOffset(*First);
906 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
907 Register PredReg;
908 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
909 DebugLoc DL = First->getDebugLoc();
910 MachineInstr *Merged = nullptr;
911 if (Cand.CanMergeToLSDouble)
912 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
913 Opcode, Pred, PredReg, DL, Regs,
914 Cand.Instrs);
915 if (!Merged && Cand.CanMergeToLSMulti)
916 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
917 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
918 if (!Merged)
919 return nullptr;
920
921 // Determine earliest instruction that will get removed. We then keep an
922 // iterator just above it so the following erases don't invalidated it.
923 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
924 bool EarliestAtBegin = false;
925 if (EarliestI == MBB.begin()) {
926 EarliestAtBegin = true;
927 } else {
928 EarliestI = std::prev(EarliestI);
929 }
930
931 // Remove instructions which have been merged.
932 for (MachineInstr *MI : Cand.Instrs)
933 MBB.erase(MI);
934
935 // Determine range between the earliest removed instruction and the new one.
936 if (EarliestAtBegin)
937 EarliestI = MBB.begin();
938 else
939 EarliestI = std::next(EarliestI);
940 auto FixupRange = make_range(EarliestI, iterator(Merged));
941
942 if (isLoadSingle(Opcode)) {
943 // If the previous loads defined a super-reg, then we have to mark earlier
944 // operands undef; Replicate the super-reg def on the merged instruction.
945 for (MachineInstr &MI : FixupRange) {
946 for (unsigned &ImpDefReg : ImpDefs) {
947 for (MachineOperand &MO : MI.implicit_operands()) {
948 if (!MO.isReg() || MO.getReg() != ImpDefReg)
949 continue;
950 if (MO.readsReg())
951 MO.setIsUndef();
952 else if (MO.isDef())
953 ImpDefReg = 0;
954 }
955 }
956 }
957
958 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
959 for (unsigned ImpDef : ImpDefs)
960 MIB.addReg(ImpDef, RegState::ImplicitDefine);
961 } else {
962 // Remove kill flags: We are possibly storing the values later now.
963 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
964 for (MachineInstr &MI : FixupRange) {
965 for (MachineOperand &MO : MI.uses()) {
966 if (!MO.isReg() || !MO.isKill())
967 continue;
968 if (UsedRegs.count(MO.getReg()))
969 MO.setIsKill(false);
970 }
971 }
972 assert(ImpDefs.empty());
973 }
974
975 return Merged;
976}
977
979 unsigned Value = abs(Offset);
980 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
981 // multiplied by 4.
982 return (Value % 4) == 0 && Value < 1024;
983}
984
985/// Return true for loads/stores that can be combined to a double/multi
986/// operation without increasing the requirements for alignment.
988 const MachineInstr &MI) {
989 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
990 // difference.
991 unsigned Opcode = MI.getOpcode();
992 if (!isi32Load(Opcode) && !isi32Store(Opcode))
993 return true;
994
995 // Stack pointer alignment is out of the programmers control so we can trust
996 // SP-relative loads/stores.
997 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
999 return true;
1000 return false;
1001}
1002
1003/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1004void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1005 const MachineInstr *FirstMI = MemOps[0].MI;
1006 unsigned Opcode = FirstMI->getOpcode();
1007 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1008 unsigned Size = getLSMultipleTransferSize(FirstMI);
1009
1010 unsigned SIndex = 0;
1011 unsigned EIndex = MemOps.size();
1012 do {
1013 // Look at the first instruction.
1014 const MachineInstr *MI = MemOps[SIndex].MI;
1015 int Offset = MemOps[SIndex].Offset;
1016 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1017 Register PReg = PMO.getReg();
1018 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1019 : TRI->getEncodingValue(PReg);
1020 unsigned Latest = SIndex;
1021 unsigned Earliest = SIndex;
1022 unsigned Count = 1;
1023 bool CanMergeToLSDouble =
1024 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1025 // ARM errata 602117: LDRD with base in list may result in incorrect base
1026 // register when interrupted or faulted.
1027 if (STI->isCortexM3() && isi32Load(Opcode) &&
1028 PReg == getLoadStoreBaseOp(*MI).getReg())
1029 CanMergeToLSDouble = false;
1030
1031 bool CanMergeToLSMulti = true;
1032 // On swift vldm/vstm starting with an odd register number as that needs
1033 // more uops than single vldrs.
1034 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1035 CanMergeToLSMulti = false;
1036
1037 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1038 // deprecated; LDM to PC is fine but cannot happen here.
1039 if (PReg == ARM::SP || PReg == ARM::PC)
1040 CanMergeToLSMulti = CanMergeToLSDouble = false;
1041
1042 // Should we be conservative?
1044 CanMergeToLSMulti = CanMergeToLSDouble = false;
1045
1046 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1047 unsigned Limit;
1048 switch (Opcode) {
1049 default:
1050 Limit = UINT_MAX;
1051 break;
1052 case ARM::VLDRD:
1053 case ARM::VSTRD:
1054 Limit = 16;
1055 break;
1056 }
1057
1058 // Merge following instructions where possible.
1059 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1060 int NewOffset = MemOps[I].Offset;
1061 if (NewOffset != Offset + (int)Size)
1062 break;
1063 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1064 Register Reg = MO.getReg();
1065 if (Reg == ARM::SP || Reg == ARM::PC)
1066 break;
1067 if (Count == Limit)
1068 break;
1069
1070 // See if the current load/store may be part of a multi load/store.
1071 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1072 : TRI->getEncodingValue(Reg);
1073 bool PartOfLSMulti = CanMergeToLSMulti;
1074 if (PartOfLSMulti) {
1075 // Register numbers must be in ascending order.
1076 if (RegNum <= PRegNum)
1077 PartOfLSMulti = false;
1078 // For VFP / NEON load/store multiples, the registers must be
1079 // consecutive and within the limit on the number of registers per
1080 // instruction.
1081 else if (!isNotVFP && RegNum != PRegNum+1)
1082 PartOfLSMulti = false;
1083 }
1084 // See if the current load/store may be part of a double load/store.
1085 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1086
1087 if (!PartOfLSMulti && !PartOfLSDouble)
1088 break;
1089 CanMergeToLSMulti &= PartOfLSMulti;
1090 CanMergeToLSDouble &= PartOfLSDouble;
1091 // Track MemOp with latest and earliest position (Positions are
1092 // counted in reverse).
1093 unsigned Position = MemOps[I].Position;
1094 if (Position < MemOps[Latest].Position)
1095 Latest = I;
1096 else if (Position > MemOps[Earliest].Position)
1097 Earliest = I;
1098 // Prepare for next MemOp.
1099 Offset += Size;
1100 PRegNum = RegNum;
1101 }
1102
1103 // Form a candidate from the Ops collected so far.
1104 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1105 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1106 Candidate->Instrs.push_back(MemOps[C].MI);
1107 Candidate->LatestMIIdx = Latest - SIndex;
1108 Candidate->EarliestMIIdx = Earliest - SIndex;
1109 Candidate->InsertPos = MemOps[Latest].Position;
1110 if (Count == 1)
1111 CanMergeToLSMulti = CanMergeToLSDouble = false;
1112 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1113 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1114 Candidates.push_back(Candidate);
1115 // Continue after the chain.
1116 SIndex += Count;
1117 } while (SIndex < EIndex);
1118}
1119
1120static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1122 switch (Opc) {
1123 default: llvm_unreachable("Unhandled opcode!");
1124 case ARM::LDMIA:
1125 case ARM::LDMDA:
1126 case ARM::LDMDB:
1127 case ARM::LDMIB:
1128 switch (Mode) {
1129 default: llvm_unreachable("Unhandled submode!");
1130 case ARM_AM::ia: return ARM::LDMIA_UPD;
1131 case ARM_AM::ib: return ARM::LDMIB_UPD;
1132 case ARM_AM::da: return ARM::LDMDA_UPD;
1133 case ARM_AM::db: return ARM::LDMDB_UPD;
1134 }
1135 case ARM::STMIA:
1136 case ARM::STMDA:
1137 case ARM::STMDB:
1138 case ARM::STMIB:
1139 switch (Mode) {
1140 default: llvm_unreachable("Unhandled submode!");
1141 case ARM_AM::ia: return ARM::STMIA_UPD;
1142 case ARM_AM::ib: return ARM::STMIB_UPD;
1143 case ARM_AM::da: return ARM::STMDA_UPD;
1144 case ARM_AM::db: return ARM::STMDB_UPD;
1145 }
1146 case ARM::t2LDMIA:
1147 case ARM::t2LDMDB:
1148 switch (Mode) {
1149 default: llvm_unreachable("Unhandled submode!");
1150 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1151 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1152 }
1153 case ARM::t2STMIA:
1154 case ARM::t2STMDB:
1155 switch (Mode) {
1156 default: llvm_unreachable("Unhandled submode!");
1157 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1158 case ARM_AM::db: return ARM::t2STMDB_UPD;
1159 }
1160 case ARM::VLDMSIA:
1161 switch (Mode) {
1162 default: llvm_unreachable("Unhandled submode!");
1163 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1164 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1165 }
1166 case ARM::VLDMDIA:
1167 switch (Mode) {
1168 default: llvm_unreachable("Unhandled submode!");
1169 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1170 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1171 }
1172 case ARM::VSTMSIA:
1173 switch (Mode) {
1174 default: llvm_unreachable("Unhandled submode!");
1175 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1176 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1177 }
1178 case ARM::VSTMDIA:
1179 switch (Mode) {
1180 default: llvm_unreachable("Unhandled submode!");
1181 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1182 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1183 }
1184 }
1185}
1186
1187/// Check if the given instruction increments or decrements a register and
1188/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1189/// generated by the instruction are possibly read as well.
1191 ARMCC::CondCodes Pred, Register PredReg) {
1192 bool CheckCPSRDef;
1193 int Scale;
1194 switch (MI.getOpcode()) {
1195 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1196 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1197 case ARM::t2SUBri:
1198 case ARM::t2SUBspImm:
1199 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1200 case ARM::t2ADDri:
1201 case ARM::t2ADDspImm:
1202 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1203 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1204 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1205 default: return 0;
1206 }
1207
1208 Register MIPredReg;
1209 if (MI.getOperand(0).getReg() != Reg ||
1210 MI.getOperand(1).getReg() != Reg ||
1211 getInstrPredicate(MI, MIPredReg) != Pred ||
1212 MIPredReg != PredReg)
1213 return 0;
1214
1215 if (CheckCPSRDef && definesCPSR(MI))
1216 return 0;
1217 return MI.getOperand(2).getImm() * Scale;
1218}
1219
1220/// Searches for an increment or decrement of \p Reg before \p MBBI.
1223 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1224 Offset = 0;
1225 MachineBasicBlock &MBB = *MBBI->getParent();
1226 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1227 MachineBasicBlock::iterator EndMBBI = MBB.end();
1228 if (MBBI == BeginMBBI)
1229 return EndMBBI;
1230
1231 // Skip debug values.
1232 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1233 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1234 --PrevMBBI;
1235
1236 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1237 return Offset == 0 ? EndMBBI : PrevMBBI;
1238}
1239
1240/// Searches for a increment or decrement of \p Reg after \p MBBI.
1243 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1244 const TargetRegisterInfo *TRI) {
1245 Offset = 0;
1246 MachineBasicBlock &MBB = *MBBI->getParent();
1247 MachineBasicBlock::iterator EndMBBI = MBB.end();
1248 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1249 while (NextMBBI != EndMBBI) {
1250 // Skip debug values.
1251 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1252 ++NextMBBI;
1253 if (NextMBBI == EndMBBI)
1254 return EndMBBI;
1255
1256 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1257 if (Off) {
1258 Offset = Off;
1259 return NextMBBI;
1260 }
1261
1262 // SP can only be combined if it is the next instruction after the original
1263 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1264 // anything below the new pointer) when its frame elements are still in
1265 // use. Other registers can attempt to look further, until a different use
1266 // or def of the register is found.
1267 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1268 NextMBBI->definesRegister(Reg, TRI))
1269 return EndMBBI;
1270
1271 ++NextMBBI;
1272 }
1273 return EndMBBI;
1274}
1275
1276/// Fold proceeding/trailing inc/dec of base register into the
1277/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1278///
1279/// stmia rn, <ra, rb, rc>
1280/// rn := rn + 4 * 3;
1281/// =>
1282/// stmia rn!, <ra, rb, rc>
1283///
1284/// rn := rn - 4 * 3;
1285/// ldmia rn, <ra, rb, rc>
1286/// =>
1287/// ldmdb rn!, <ra, rb, rc>
1288bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1289 // Thumb1 is already using updating loads/stores.
1290 if (isThumb1) return false;
1291 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1292
1293 const MachineOperand &BaseOP = MI->getOperand(0);
1294 Register Base = BaseOP.getReg();
1295 bool BaseKill = BaseOP.isKill();
1296 Register PredReg;
1297 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1298 unsigned Opcode = MI->getOpcode();
1299 DebugLoc DL = MI->getDebugLoc();
1300
1301 // Can't use an updating ld/st if the base register is also a dest
1302 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1303 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1304 if (MO.getReg() == Base)
1305 return false;
1306
1307 int Bytes = getLSMultipleTransferSize(MI);
1308 MachineBasicBlock &MBB = *MI->getParent();
1310 int Offset;
1312 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1314 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1315 Mode = ARM_AM::db;
1316 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1317 Mode = ARM_AM::da;
1318 } else {
1319 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1320 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1321 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1322
1323 // We couldn't find an inc/dec to merge. But if the base is dead, we
1324 // can still change to a writeback form as that will save us 2 bytes
1325 // of code size. It can create WAW hazards though, so only do it if
1326 // we're minimizing code size.
1327 if (!STI->hasMinSize() || !BaseKill)
1328 return false;
1329
1330 bool HighRegsUsed = false;
1331 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1332 if (MO.getReg() >= ARM::R8) {
1333 HighRegsUsed = true;
1334 break;
1335 }
1336
1337 if (!HighRegsUsed)
1338 MergeInstr = MBB.end();
1339 else
1340 return false;
1341 }
1342 }
1343 if (MergeInstr != MBB.end()) {
1344 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1345 MBB.erase(MergeInstr);
1346 }
1347
1348 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1350 .addReg(Base, getDefRegState(true)) // WB base register
1351 .addReg(Base, getKillRegState(BaseKill))
1352 .addImm(Pred).addReg(PredReg);
1353
1354 // Transfer the rest of operands.
1355 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1356 MIB.add(MO);
1357
1358 // Transfer memoperands.
1359 MIB.setMemRefs(MI->memoperands());
1360
1361 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1362 MBB.erase(MBBI);
1363 return true;
1364}
1365
1366static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1368 switch (Opc) {
1369 case ARM::LDRi12:
1370 return ARM::LDR_PRE_IMM;
1371 case ARM::STRi12:
1372 return ARM::STR_PRE_IMM;
1373 case ARM::VLDRS:
1374 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1375 case ARM::VLDRD:
1376 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1377 case ARM::VSTRS:
1378 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1379 case ARM::VSTRD:
1380 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1381 case ARM::t2LDRi8:
1382 case ARM::t2LDRi12:
1383 return ARM::t2LDR_PRE;
1384 case ARM::t2STRi8:
1385 case ARM::t2STRi12:
1386 return ARM::t2STR_PRE;
1387 default: llvm_unreachable("Unhandled opcode!");
1388 }
1389}
1390
1391static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1393 switch (Opc) {
1394 case ARM::LDRi12:
1395 return ARM::LDR_POST_IMM;
1396 case ARM::STRi12:
1397 return ARM::STR_POST_IMM;
1398 case ARM::VLDRS:
1399 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1400 case ARM::VLDRD:
1401 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1402 case ARM::VSTRS:
1403 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1404 case ARM::VSTRD:
1405 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1406 case ARM::t2LDRi8:
1407 case ARM::t2LDRi12:
1408 return ARM::t2LDR_POST;
1409 case ARM::t2LDRBi8:
1410 case ARM::t2LDRBi12:
1411 return ARM::t2LDRB_POST;
1412 case ARM::t2LDRSBi8:
1413 case ARM::t2LDRSBi12:
1414 return ARM::t2LDRSB_POST;
1415 case ARM::t2LDRHi8:
1416 case ARM::t2LDRHi12:
1417 return ARM::t2LDRH_POST;
1418 case ARM::t2LDRSHi8:
1419 case ARM::t2LDRSHi12:
1420 return ARM::t2LDRSH_POST;
1421 case ARM::t2STRi8:
1422 case ARM::t2STRi12:
1423 return ARM::t2STR_POST;
1424 case ARM::t2STRBi8:
1425 case ARM::t2STRBi12:
1426 return ARM::t2STRB_POST;
1427 case ARM::t2STRHi8:
1428 case ARM::t2STRHi12:
1429 return ARM::t2STRH_POST;
1430
1431 case ARM::MVE_VLDRBS16:
1432 return ARM::MVE_VLDRBS16_post;
1433 case ARM::MVE_VLDRBS32:
1434 return ARM::MVE_VLDRBS32_post;
1435 case ARM::MVE_VLDRBU16:
1436 return ARM::MVE_VLDRBU16_post;
1437 case ARM::MVE_VLDRBU32:
1438 return ARM::MVE_VLDRBU32_post;
1439 case ARM::MVE_VLDRHS32:
1440 return ARM::MVE_VLDRHS32_post;
1441 case ARM::MVE_VLDRHU32:
1442 return ARM::MVE_VLDRHU32_post;
1443 case ARM::MVE_VLDRBU8:
1444 return ARM::MVE_VLDRBU8_post;
1445 case ARM::MVE_VLDRHU16:
1446 return ARM::MVE_VLDRHU16_post;
1447 case ARM::MVE_VLDRWU32:
1448 return ARM::MVE_VLDRWU32_post;
1449 case ARM::MVE_VSTRB16:
1450 return ARM::MVE_VSTRB16_post;
1451 case ARM::MVE_VSTRB32:
1452 return ARM::MVE_VSTRB32_post;
1453 case ARM::MVE_VSTRH32:
1454 return ARM::MVE_VSTRH32_post;
1455 case ARM::MVE_VSTRBU8:
1456 return ARM::MVE_VSTRBU8_post;
1457 case ARM::MVE_VSTRHU16:
1458 return ARM::MVE_VSTRHU16_post;
1459 case ARM::MVE_VSTRWU32:
1460 return ARM::MVE_VSTRWU32_post;
1461
1462 default: llvm_unreachable("Unhandled opcode!");
1463 }
1464}
1465
1466/// Fold proceeding/trailing inc/dec of base register into the
1467/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1468bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1469 // Thumb1 doesn't have updating LDR/STR.
1470 // FIXME: Use LDM/STM with single register instead.
1471 if (isThumb1) return false;
1472 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1473
1475 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1476 unsigned Opcode = MI->getOpcode();
1477 DebugLoc DL = MI->getDebugLoc();
1478 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1479 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1480 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1481 if (isi32Load(Opcode) || isi32Store(Opcode))
1482 if (MI->getOperand(2).getImm() != 0)
1483 return false;
1484 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1485 return false;
1486
1487 // Can't do the merge if the destination register is the same as the would-be
1488 // writeback register.
1489 if (MI->getOperand(0).getReg() == Base)
1490 return false;
1491
1492 Register PredReg;
1493 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1494 int Bytes = getLSMultipleTransferSize(MI);
1495 MachineBasicBlock &MBB = *MI->getParent();
1497 int Offset;
1499 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1500 unsigned NewOpc;
1501 if (!isAM5 && Offset == Bytes) {
1502 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1503 } else if (Offset == -Bytes) {
1504 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1505 } else {
1506 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1507 if (MergeInstr == MBB.end())
1508 return false;
1509
1511 if ((isAM5 && Offset != Bytes) ||
1512 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1514 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1515 return false;
1516 }
1517 }
1518 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1519 MBB.erase(MergeInstr);
1520
1522
1523 bool isLd = isLoadSingle(Opcode);
1524 if (isAM5) {
1525 // VLDM[SD]_UPD, VSTM[SD]_UPD
1526 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1527 // updating load/store-multiple instructions can be used with only one
1528 // register.)
1529 MachineOperand &MO = MI->getOperand(0);
1530 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1531 .addReg(Base, getDefRegState(true)) // WB base register
1532 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1533 .addImm(Pred)
1534 .addReg(PredReg)
1535 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1536 : getKillRegState(MO.isKill())))
1537 .cloneMemRefs(*MI);
1538 (void)MIB;
1539 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1540 } else if (isLd) {
1541 if (isAM2) {
1542 // LDR_PRE, LDR_POST
1543 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1544 auto MIB =
1545 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1546 .addReg(Base, RegState::Define)
1547 .addReg(Base)
1548 .addImm(Offset)
1549 .addImm(Pred)
1550 .addReg(PredReg)
1551 .cloneMemRefs(*MI);
1552 (void)MIB;
1553 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1554 } else {
1556 auto MIB =
1557 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1558 .addReg(Base, RegState::Define)
1559 .addReg(Base)
1560 .addReg(0)
1561 .addImm(Imm)
1562 .add(predOps(Pred, PredReg))
1563 .cloneMemRefs(*MI);
1564 (void)MIB;
1565 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1566 }
1567 } else {
1568 // t2LDR_PRE, t2LDR_POST
1569 auto MIB =
1570 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1571 .addReg(Base, RegState::Define)
1572 .addReg(Base)
1573 .addImm(Offset)
1574 .add(predOps(Pred, PredReg))
1575 .cloneMemRefs(*MI);
1576 (void)MIB;
1577 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1578 }
1579 } else {
1580 MachineOperand &MO = MI->getOperand(0);
1581 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1582 // the vestigal zero-reg offset register. When that's fixed, this clause
1583 // can be removed entirely.
1584 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1586 // STR_PRE, STR_POST
1587 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1588 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1589 .addReg(Base)
1590 .addReg(0)
1591 .addImm(Imm)
1592 .add(predOps(Pred, PredReg))
1593 .cloneMemRefs(*MI);
1594 (void)MIB;
1595 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1596 } else {
1597 // t2STR_PRE, t2STR_POST
1598 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1599 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1600 .addReg(Base)
1601 .addImm(Offset)
1602 .add(predOps(Pred, PredReg))
1603 .cloneMemRefs(*MI);
1604 (void)MIB;
1605 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1606 }
1607 }
1608 MBB.erase(MBBI);
1609
1610 return true;
1611}
1612
1613bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1614 unsigned Opcode = MI.getOpcode();
1615 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1616 "Must have t2STRDi8 or t2LDRDi8");
1617 if (MI.getOperand(3).getImm() != 0)
1618 return false;
1619 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1620
1621 // Behaviour for writeback is undefined if base register is the same as one
1622 // of the others.
1623 const MachineOperand &BaseOp = MI.getOperand(2);
1624 Register Base = BaseOp.getReg();
1625 const MachineOperand &Reg0Op = MI.getOperand(0);
1626 const MachineOperand &Reg1Op = MI.getOperand(1);
1627 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1628 return false;
1629
1630 Register PredReg;
1631 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1633 MachineBasicBlock &MBB = *MI.getParent();
1634 int Offset;
1636 PredReg, Offset);
1637 unsigned NewOpc;
1638 if (Offset == 8 || Offset == -8) {
1639 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1640 } else {
1641 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1642 if (MergeInstr == MBB.end())
1643 return false;
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1645 if (!isLegalAddressImm(NewOpc, Offset, TII))
1646 return false;
1647 }
1648 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1649 MBB.erase(MergeInstr);
1650
1651 DebugLoc DL = MI.getDebugLoc();
1652 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1653 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1654 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1655 } else {
1656 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1657 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1658 }
1659 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1660 .addImm(Offset).addImm(Pred).addReg(PredReg);
1661 assert(TII->get(Opcode).getNumOperands() == 6 &&
1662 TII->get(NewOpc).getNumOperands() == 7 &&
1663 "Unexpected number of operands in Opcode specification.");
1664
1665 // Transfer implicit operands.
1666 for (const MachineOperand &MO : MI.implicit_operands())
1667 MIB.add(MO);
1668 MIB.cloneMemRefs(MI);
1669
1670 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1671 MBB.erase(MBBI);
1672 return true;
1673}
1674
1675/// Returns true if instruction is a memory operation that this pass is capable
1676/// of operating on.
1677static bool isMemoryOp(const MachineInstr &MI) {
1678 unsigned Opcode = MI.getOpcode();
1679 switch (Opcode) {
1680 case ARM::VLDRS:
1681 case ARM::VSTRS:
1682 case ARM::VLDRD:
1683 case ARM::VSTRD:
1684 case ARM::LDRi12:
1685 case ARM::STRi12:
1686 case ARM::tLDRi:
1687 case ARM::tSTRi:
1688 case ARM::tLDRspi:
1689 case ARM::tSTRspi:
1690 case ARM::t2LDRi8:
1691 case ARM::t2LDRi12:
1692 case ARM::t2STRi8:
1693 case ARM::t2STRi12:
1694 break;
1695 default:
1696 return false;
1697 }
1698 if (!MI.getOperand(1).isReg())
1699 return false;
1700
1701 // When no memory operands are present, conservatively assume unaligned,
1702 // volatile, unfoldable.
1703 if (!MI.hasOneMemOperand())
1704 return false;
1705
1706 const MachineMemOperand &MMO = **MI.memoperands_begin();
1707
1708 // Don't touch volatile memory accesses - we may be changing their order.
1709 // TODO: We could allow unordered and monotonic atomics here, but we need to
1710 // make sure the resulting ldm/stm is correctly marked as atomic.
1711 if (MMO.isVolatile() || MMO.isAtomic())
1712 return false;
1713
1714 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1715 // not.
1716 if (MMO.getAlign() < Align(4))
1717 return false;
1718
1719 // str <undef> could probably be eliminated entirely, but for now we just want
1720 // to avoid making a mess of it.
1721 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1722 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1723 return false;
1724
1725 // Likewise don't mess with references to undefined addresses.
1726 if (MI.getOperand(1).isUndef())
1727 return false;
1728
1729 return true;
1730}
1731
1734 bool isDef, unsigned NewOpc, unsigned Reg,
1735 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1736 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1737 unsigned PredReg, const TargetInstrInfo *TII,
1738 MachineInstr *MI) {
1739 if (isDef) {
1740 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1741 TII->get(NewOpc))
1742 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1743 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1744 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1745 // FIXME: This is overly conservative; the new instruction accesses 4
1746 // bytes, not 8.
1747 MIB.cloneMemRefs(*MI);
1748 } else {
1749 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1750 TII->get(NewOpc))
1751 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1752 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1753 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1754 // FIXME: This is overly conservative; the new instruction accesses 4
1755 // bytes, not 8.
1756 MIB.cloneMemRefs(*MI);
1757 }
1758}
1759
1760bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1762 MachineInstr *MI = &*MBBI;
1763 unsigned Opcode = MI->getOpcode();
1764 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1765 // if we see this opcode.
1766 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1767 return false;
1768
1769 const MachineOperand &BaseOp = MI->getOperand(2);
1770 Register BaseReg = BaseOp.getReg();
1771 Register EvenReg = MI->getOperand(0).getReg();
1772 Register OddReg = MI->getOperand(1).getReg();
1773 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1774 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1775
1776 // ARM errata 602117: LDRD with base in list may result in incorrect base
1777 // register when interrupted or faulted.
1778 bool Errata602117 = EvenReg == BaseReg &&
1779 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1780 // ARM LDRD/STRD needs consecutive registers.
1781 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1782 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1783
1784 if (!Errata602117 && !NonConsecutiveRegs)
1785 return false;
1786
1787 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1788 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1789 bool EvenDeadKill = isLd ?
1790 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1791 bool EvenUndef = MI->getOperand(0).isUndef();
1792 bool OddDeadKill = isLd ?
1793 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1794 bool OddUndef = MI->getOperand(1).isUndef();
1795 bool BaseKill = BaseOp.isKill();
1796 bool BaseUndef = BaseOp.isUndef();
1797 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1798 "register offset not handled below");
1799 int OffImm = getMemoryOpOffset(*MI);
1800 Register PredReg;
1801 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1802
1803 if (OddRegNum > EvenRegNum && OffImm == 0) {
1804 // Ascending register numbers and no offset. It's safe to change it to a
1805 // ldm or stm.
1806 unsigned NewOpc = (isLd)
1807 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1808 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1809 if (isLd) {
1810 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1811 .addReg(BaseReg, getKillRegState(BaseKill))
1812 .addImm(Pred).addReg(PredReg)
1813 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1814 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1815 .cloneMemRefs(*MI);
1816 ++NumLDRD2LDM;
1817 } else {
1818 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1819 .addReg(BaseReg, getKillRegState(BaseKill))
1820 .addImm(Pred).addReg(PredReg)
1821 .addReg(EvenReg,
1822 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1823 .addReg(OddReg,
1824 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1825 .cloneMemRefs(*MI);
1826 ++NumSTRD2STM;
1827 }
1828 } else {
1829 // Split into two instructions.
1830 unsigned NewOpc = (isLd)
1831 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1832 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1833 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1834 // so adjust and use t2LDRi12 here for that.
1835 unsigned NewOpc2 = (isLd)
1836 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1837 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1838 // If this is a load, make sure the first load does not clobber the base
1839 // register before the second load reads it.
1840 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1841 assert(!TRI->regsOverlap(OddReg, BaseReg));
1842 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1843 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1844 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1845 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1846 MI);
1847 } else {
1848 if (OddReg == EvenReg && EvenDeadKill) {
1849 // If the two source operands are the same, the kill marker is
1850 // probably on the first one. e.g.
1851 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1852 EvenDeadKill = false;
1853 OddDeadKill = true;
1854 }
1855 // Never kill the base register in the first instruction.
1856 if (EvenReg == BaseReg)
1857 EvenDeadKill = false;
1858 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1859 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1860 MI);
1861 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1862 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1863 MI);
1864 }
1865 if (isLd)
1866 ++NumLDRD2LDR;
1867 else
1868 ++NumSTRD2STR;
1869 }
1870
1871 MBBI = MBB.erase(MBBI);
1872 return true;
1873}
1874
1875/// An optimization pass to turn multiple LDR / STR ops of the same base and
1876/// incrementing offset into LDM / STM ops.
1877bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1878 MemOpQueue MemOps;
1879 unsigned CurrBase = 0;
1880 unsigned CurrOpc = ~0u;
1881 ARMCC::CondCodes CurrPred = ARMCC::AL;
1882 unsigned Position = 0;
1883 assert(Candidates.size() == 0);
1884 assert(MergeBaseCandidates.size() == 0);
1885 LiveRegsValid = false;
1886
1888 I = MBBI) {
1889 // The instruction in front of the iterator is the one we look at.
1890 MBBI = std::prev(I);
1891 if (FixInvalidRegPairOp(MBB, MBBI))
1892 continue;
1893 ++Position;
1894
1895 if (isMemoryOp(*MBBI)) {
1896 unsigned Opcode = MBBI->getOpcode();
1897 const MachineOperand &MO = MBBI->getOperand(0);
1898 Register Reg = MO.getReg();
1900 Register PredReg;
1901 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1903 if (CurrBase == 0) {
1904 // Start of a new chain.
1905 CurrBase = Base;
1906 CurrOpc = Opcode;
1907 CurrPred = Pred;
1908 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1909 continue;
1910 }
1911 // Note: No need to match PredReg in the next if.
1912 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1913 // Watch out for:
1914 // r4 := ldr [r0, #8]
1915 // r4 := ldr [r0, #4]
1916 // or
1917 // r0 := ldr [r0]
1918 // If a load overrides the base register or a register loaded by
1919 // another load in our chain, we cannot take this instruction.
1920 bool Overlap = false;
1921 if (isLoadSingle(Opcode)) {
1922 Overlap = (Base == Reg);
1923 if (!Overlap) {
1924 for (const MemOpQueueEntry &E : MemOps) {
1925 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1926 Overlap = true;
1927 break;
1928 }
1929 }
1930 }
1931 }
1932
1933 if (!Overlap) {
1934 // Check offset and sort memory operation into the current chain.
1935 if (Offset > MemOps.back().Offset) {
1936 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1937 continue;
1938 } else {
1939 MemOpQueue::iterator MI, ME;
1940 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1941 if (Offset < MI->Offset) {
1942 // Found a place to insert.
1943 break;
1944 }
1945 if (Offset == MI->Offset) {
1946 // Collision, abort.
1947 MI = ME;
1948 break;
1949 }
1950 }
1951 if (MI != MemOps.end()) {
1952 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1953 continue;
1954 }
1955 }
1956 }
1957 }
1958
1959 // Don't advance the iterator; The op will start a new chain next.
1960 MBBI = I;
1961 --Position;
1962 // Fallthrough to look into existing chain.
1963 } else if (MBBI->isDebugInstr()) {
1964 continue;
1965 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1966 MBBI->getOpcode() == ARM::t2STRDi8) {
1967 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1968 // remember them because we may still be able to merge add/sub into them.
1969 MergeBaseCandidates.push_back(&*MBBI);
1970 }
1971
1972 // If we are here then the chain is broken; Extract candidates for a merge.
1973 if (MemOps.size() > 0) {
1974 FormCandidates(MemOps);
1975 // Reset for the next chain.
1976 CurrBase = 0;
1977 CurrOpc = ~0u;
1978 CurrPred = ARMCC::AL;
1979 MemOps.clear();
1980 }
1981 }
1982 if (MemOps.size() > 0)
1983 FormCandidates(MemOps);
1984
1985 // Sort candidates so they get processed from end to begin of the basic
1986 // block later; This is necessary for liveness calculation.
1987 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1988 return M0->InsertPos < M1->InsertPos;
1989 };
1990 llvm::sort(Candidates, LessThan);
1991
1992 // Go through list of candidates and merge.
1993 bool Changed = false;
1994 for (const MergeCandidate *Candidate : Candidates) {
1995 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1996 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1997 // Merge preceding/trailing base inc/dec into the merged op.
1998 if (Merged) {
1999 Changed = true;
2000 unsigned Opcode = Merged->getOpcode();
2001 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2002 MergeBaseUpdateLSDouble(*Merged);
2003 else
2004 MergeBaseUpdateLSMultiple(Merged);
2005 } else {
2006 for (MachineInstr *MI : Candidate->Instrs) {
2007 if (MergeBaseUpdateLoadStore(MI))
2008 Changed = true;
2009 }
2010 }
2011 } else {
2012 assert(Candidate->Instrs.size() == 1);
2013 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2014 Changed = true;
2015 }
2016 }
2017 Candidates.clear();
2018 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2019 for (MachineInstr *MI : MergeBaseCandidates)
2020 MergeBaseUpdateLSDouble(*MI);
2021 MergeBaseCandidates.clear();
2022
2023 return Changed;
2024}
2025
2026/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2027/// into the preceding stack restore so it directly restore the value of LR
2028/// into pc.
2029/// ldmfd sp!, {..., lr}
2030/// bx lr
2031/// or
2032/// ldmfd sp!, {..., lr}
2033/// mov pc, lr
2034/// =>
2035/// ldmfd sp!, {..., pc}
2036bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2037 // Thumb1 LDM doesn't allow high registers.
2038 if (isThumb1) return false;
2039 if (MBB.empty()) return false;
2040
2042 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2043 (MBBI->getOpcode() == ARM::BX_RET ||
2044 MBBI->getOpcode() == ARM::tBX_RET ||
2045 MBBI->getOpcode() == ARM::MOVPCLR)) {
2046 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2047 // Ignore any debug instructions.
2048 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2049 --PrevI;
2050 MachineInstr &PrevMI = *PrevI;
2051 unsigned Opcode = PrevMI.getOpcode();
2052 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2053 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2054 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2055 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2056 if (MO.getReg() != ARM::LR)
2057 return false;
2058 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2059 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2060 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2061 PrevMI.setDesc(TII->get(NewOpc));
2062 MO.setReg(ARM::PC);
2063 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2064 MBB.erase(MBBI);
2065 return true;
2066 }
2067 }
2068 return false;
2069}
2070
2071bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2073 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2074 MBBI->getOpcode() != ARM::tBX_RET)
2075 return false;
2076
2078 --Prev;
2079 if (Prev->getOpcode() != ARM::tMOVr ||
2080 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2081 return false;
2082
2083 for (auto Use : Prev->uses())
2084 if (Use.isKill()) {
2085 assert(STI->hasV4TOps());
2086 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2087 .addReg(Use.getReg(), RegState::Kill)
2090 MBB.erase(MBBI);
2091 MBB.erase(Prev);
2092 return true;
2093 }
2094
2095 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2096}
2097
2098bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2099 if (skipFunction(Fn.getFunction()))
2100 return false;
2101
2102 MF = &Fn;
2103 STI = &Fn.getSubtarget<ARMSubtarget>();
2104 TL = STI->getTargetLowering();
2105 AFI = Fn.getInfo<ARMFunctionInfo>();
2106 TII = STI->getInstrInfo();
2107 TRI = STI->getRegisterInfo();
2108
2109 RegClassInfoValid = false;
2110 isThumb2 = AFI->isThumb2Function();
2111 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2112
2113 bool Modified = false, ModifiedLDMReturn = false;
2114 for (MachineBasicBlock &MBB : Fn) {
2115 Modified |= LoadStoreMultipleOpti(MBB);
2116 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2117 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2118 if (isThumb1)
2119 Modified |= CombineMovBx(MBB);
2120 }
2121 Modified |= ModifiedLDMReturn;
2122
2123 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2124 // LR is restored. This check needs to consider the whole function, not just
2125 // the instruction(s) we changed, because there may be other BX returns which
2126 // still need LR to be restored.
2127 if (ModifiedLDMReturn)
2129
2130 Allocator.DestroyAll();
2131 return Modified;
2132}
2133
2134#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2135 "ARM pre- register allocation load / store optimization pass"
2136
2137namespace {
2138
2139/// Pre- register allocation pass that move load / stores from consecutive
2140/// locations close to make it more likely they will be combined later.
2141struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass {
2142 static char ID;
2143
2145 const DataLayout *TD;
2146 const TargetInstrInfo *TII;
2147 const TargetRegisterInfo *TRI;
2148 const ARMSubtarget *STI;
2151 MachineFunction *MF;
2152
2153 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2154
2155 bool runOnMachineFunction(MachineFunction &Fn) override;
2156
2157 StringRef getPassName() const override {
2159 }
2160
2161 void getAnalysisUsage(AnalysisUsage &AU) const override {
2166 }
2167
2168private:
2169 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2170 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2171 Register &BaseReg, int &Offset, Register &PredReg,
2172 ARMCC::CondCodes &Pred, bool &isT2);
2173 bool RescheduleOps(
2175 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2177 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2178 bool DistributeIncrements();
2179 bool DistributeIncrements(Register Base);
2180};
2181
2182} // end anonymous namespace
2183
2184char ARMPreAllocLoadStoreOpt::ID = 0;
2185
2186INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2189INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2191
2192// Limit the number of instructions to be rescheduled.
2193// FIXME: tune this limit, and/or come up with some better heuristics.
2194static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2195 cl::init(8), cl::Hidden);
2196
2197bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2198 if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2199 return false;
2200
2201 TD = &Fn.getDataLayout();
2202 STI = &Fn.getSubtarget<ARMSubtarget>();
2203 TII = STI->getInstrInfo();
2204 TRI = STI->getRegisterInfo();
2205 MRI = &Fn.getRegInfo();
2206 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2207 MF = &Fn;
2208 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2209
2210 bool Modified = DistributeIncrements();
2211 for (MachineBasicBlock &MFI : Fn)
2212 Modified |= RescheduleLoadStoreInstrs(&MFI);
2213
2214 return Modified;
2215}
2216
2217static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2221 SmallSet<unsigned, 4> &MemRegs,
2222 const TargetRegisterInfo *TRI,
2223 AliasAnalysis *AA) {
2224 // Are there stores / loads / calls between them?
2225 SmallSet<unsigned, 4> AddedRegPressure;
2226 while (++I != E) {
2227 if (I->isDebugInstr() || MemOps.count(&*I))
2228 continue;
2229 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2230 return false;
2231 if (I->mayStore() || (!isLd && I->mayLoad()))
2232 for (MachineInstr *MemOp : MemOps)
2233 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2234 return false;
2235 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2236 MachineOperand &MO = I->getOperand(j);
2237 if (!MO.isReg())
2238 continue;
2239 Register Reg = MO.getReg();
2240 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2241 return false;
2242 if (Reg != Base && !MemRegs.count(Reg))
2243 AddedRegPressure.insert(Reg);
2244 }
2245 }
2246
2247 // Estimate register pressure increase due to the transformation.
2248 if (MemRegs.size() <= 4)
2249 // Ok if we are moving small number of instructions.
2250 return true;
2251 return AddedRegPressure.size() <= MemRegs.size() * 2;
2252}
2253
2254bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2255 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2256 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2257 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2258 // Make sure we're allowed to generate LDRD/STRD.
2259 if (!STI->hasV5TEOps())
2260 return false;
2261
2262 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2263 unsigned Scale = 1;
2264 unsigned Opcode = Op0->getOpcode();
2265 if (Opcode == ARM::LDRi12) {
2266 NewOpc = ARM::LDRD;
2267 } else if (Opcode == ARM::STRi12) {
2268 NewOpc = ARM::STRD;
2269 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2270 NewOpc = ARM::t2LDRDi8;
2271 Scale = 4;
2272 isT2 = true;
2273 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2274 NewOpc = ARM::t2STRDi8;
2275 Scale = 4;
2276 isT2 = true;
2277 } else {
2278 return false;
2279 }
2280
2281 // Make sure the base address satisfies i64 ld / st alignment requirement.
2282 // At the moment, we ignore the memoryoperand's value.
2283 // If we want to use AliasAnalysis, we should check it accordingly.
2284 if (!Op0->hasOneMemOperand() ||
2285 (*Op0->memoperands_begin())->isVolatile() ||
2286 (*Op0->memoperands_begin())->isAtomic())
2287 return false;
2288
2289 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2290 Align ReqAlign = STI->getDualLoadStoreAlignment();
2291 if (Alignment < ReqAlign)
2292 return false;
2293
2294 // Then make sure the immediate offset fits.
2295 int OffImm = getMemoryOpOffset(*Op0);
2296 if (isT2) {
2297 int Limit = (1 << 8) * Scale;
2298 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2299 return false;
2300 Offset = OffImm;
2301 } else {
2303 if (OffImm < 0) {
2305 OffImm = - OffImm;
2306 }
2307 int Limit = (1 << 8) * Scale;
2308 if (OffImm >= Limit || (OffImm & (Scale-1)))
2309 return false;
2310 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2311 }
2312 FirstReg = Op0->getOperand(0).getReg();
2313 SecondReg = Op1->getOperand(0).getReg();
2314 if (FirstReg == SecondReg)
2315 return false;
2316 BaseReg = Op0->getOperand(1).getReg();
2317 Pred = getInstrPredicate(*Op0, PredReg);
2318 dl = Op0->getDebugLoc();
2319 return true;
2320}
2321
2322bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2323 MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
2324 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2325 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap) {
2326 bool RetVal = false;
2327
2328 // Sort by offset (in reverse order).
2329 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2330 int LOffset = getMemoryOpOffset(*LHS);
2331 int ROffset = getMemoryOpOffset(*RHS);
2332 assert(LHS == RHS || LOffset != ROffset);
2333 return LOffset > ROffset;
2334 });
2335
2336 // The loads / stores of the same base are in order. Scan them from first to
2337 // last and check for the following:
2338 // 1. Any def of base.
2339 // 2. Any gaps.
2340 while (Ops.size() > 1) {
2341 unsigned FirstLoc = ~0U;
2342 unsigned LastLoc = 0;
2343 MachineInstr *FirstOp = nullptr;
2344 MachineInstr *LastOp = nullptr;
2345 int LastOffset = 0;
2346 unsigned LastOpcode = 0;
2347 unsigned LastBytes = 0;
2348 unsigned NumMove = 0;
2349 for (MachineInstr *Op : llvm::reverse(Ops)) {
2350 // Make sure each operation has the same kind.
2351 unsigned LSMOpcode
2352 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2353 if (LastOpcode && LSMOpcode != LastOpcode)
2354 break;
2355
2356 // Check that we have a continuous set of offsets.
2357 int Offset = getMemoryOpOffset(*Op);
2358 unsigned Bytes = getLSMultipleTransferSize(Op);
2359 if (LastBytes) {
2360 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2361 break;
2362 }
2363
2364 // Don't try to reschedule too many instructions.
2365 if (NumMove == InstReorderLimit)
2366 break;
2367
2368 // Found a mergable instruction; save information about it.
2369 ++NumMove;
2370 LastOffset = Offset;
2371 LastBytes = Bytes;
2372 LastOpcode = LSMOpcode;
2373
2374 unsigned Loc = MI2LocMap[Op];
2375 if (Loc <= FirstLoc) {
2376 FirstLoc = Loc;
2377 FirstOp = Op;
2378 }
2379 if (Loc >= LastLoc) {
2380 LastLoc = Loc;
2381 LastOp = Op;
2382 }
2383 }
2384
2385 if (NumMove <= 1)
2386 Ops.pop_back();
2387 else {
2388 SmallPtrSet<MachineInstr*, 4> MemOps;
2389 SmallSet<unsigned, 4> MemRegs;
2390 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2391 MemOps.insert(Ops[i]);
2392 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2393 }
2394
2395 // Be conservative, if the instructions are too far apart, don't
2396 // move them. We want to limit the increase of register pressure.
2397 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2398 if (DoMove)
2399 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2400 MemOps, MemRegs, TRI, AA);
2401 if (!DoMove) {
2402 for (unsigned i = 0; i != NumMove; ++i)
2403 Ops.pop_back();
2404 } else {
2405 // This is the new location for the loads / stores.
2406 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2407 while (InsertPos != MBB->end() &&
2408 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2409 ++InsertPos;
2410
2411 // If we are moving a pair of loads / stores, see if it makes sense
2412 // to try to allocate a pair of registers that can form register pairs.
2413 MachineInstr *Op0 = Ops.back();
2414 MachineInstr *Op1 = Ops[Ops.size()-2];
2415 Register FirstReg, SecondReg;
2416 Register BaseReg, PredReg;
2418 bool isT2 = false;
2419 unsigned NewOpc = 0;
2420 int Offset = 0;
2421 DebugLoc dl;
2422 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2423 FirstReg, SecondReg, BaseReg,
2424 Offset, PredReg, Pred, isT2)) {
2425 Ops.pop_back();
2426 Ops.pop_back();
2427
2428 const MCInstrDesc &MCID = TII->get(NewOpc);
2429 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
2430 MRI->constrainRegClass(FirstReg, TRC);
2431 MRI->constrainRegClass(SecondReg, TRC);
2432
2433 // Form the pair instruction.
2434 if (isLd) {
2435 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2436 .addReg(FirstReg, RegState::Define)
2437 .addReg(SecondReg, RegState::Define)
2438 .addReg(BaseReg);
2439 // FIXME: We're converting from LDRi12 to an insn that still
2440 // uses addrmode2, so we need an explicit offset reg. It should
2441 // always by reg0 since we're transforming LDRi12s.
2442 if (!isT2)
2443 MIB.addReg(0);
2444 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2445 MIB.cloneMergedMemRefs({Op0, Op1});
2446 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2447 ++NumLDRDFormed;
2448 } else {
2449 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2450 .addReg(FirstReg)
2451 .addReg(SecondReg)
2452 .addReg(BaseReg);
2453 // FIXME: We're converting from LDRi12 to an insn that still
2454 // uses addrmode2, so we need an explicit offset reg. It should
2455 // always by reg0 since we're transforming STRi12s.
2456 if (!isT2)
2457 MIB.addReg(0);
2458 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2459 MIB.cloneMergedMemRefs({Op0, Op1});
2460 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2461 ++NumSTRDFormed;
2462 }
2463 MBB->erase(Op0);
2464 MBB->erase(Op1);
2465
2466 if (!isT2) {
2467 // Add register allocation hints to form register pairs.
2468 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2469 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2470 }
2471 } else {
2472 for (unsigned i = 0; i != NumMove; ++i) {
2473 MachineInstr *Op = Ops.pop_back_val();
2474 if (isLd) {
2475 // Populate RegisterMap with all Registers defined by loads.
2476 Register Reg = Op->getOperand(0).getReg();
2477 RegisterMap[Reg];
2478 }
2479
2480 MBB->splice(InsertPos, MBB, Op);
2481 }
2482 }
2483
2484 NumLdStMoved += NumMove;
2485 RetVal = true;
2486 }
2487 }
2488 }
2489
2490 return RetVal;
2491}
2492
2494 std::function<void(MachineOperand &)> Fn) {
2495 if (MI->isNonListDebugValue()) {
2496 auto &Op = MI->getOperand(0);
2497 if (Op.isReg())
2498 Fn(Op);
2499 } else {
2500 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2501 auto &Op = MI->getOperand(I);
2502 if (Op.isReg())
2503 Fn(Op);
2504 }
2505 }
2506}
2507
2508// Update the RegisterMap with the instruction that was moved because a
2509// DBG_VALUE_LIST may need to be moved again.
2512 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2513
2514 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2515 auto RegIt = RegisterMap.find(Op.getReg());
2516 if (RegIt == RegisterMap.end())
2517 return;
2518 auto &InstrVec = RegIt->getSecond();
2519 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2520 });
2521}
2522
2524 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2525 MI->getDebugLoc()->getInlinedAt());
2526 return DbgVar;
2527}
2528
2529bool
2530ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2531 bool RetVal = false;
2532
2533 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2534 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2535 using BaseVec = SmallVector<unsigned, 4>;
2536 Base2InstMap Base2LdsMap;
2537 Base2InstMap Base2StsMap;
2538 BaseVec LdBases;
2539 BaseVec StBases;
2540 // This map is used to track the relationship between the virtual
2541 // register that is the result of a load that is moved and the DBG_VALUE
2542 // MachineInstr pointer that uses that virtual register.
2543 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2544
2545 unsigned Loc = 0;
2548 while (MBBI != E) {
2549 for (; MBBI != E; ++MBBI) {
2550 MachineInstr &MI = *MBBI;
2551 if (MI.isCall() || MI.isTerminator()) {
2552 // Stop at barriers.
2553 ++MBBI;
2554 break;
2555 }
2556
2557 if (!MI.isDebugInstr())
2558 MI2LocMap[&MI] = ++Loc;
2559
2560 if (!isMemoryOp(MI))
2561 continue;
2562 Register PredReg;
2563 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2564 continue;
2565
2566 int Opc = MI.getOpcode();
2567 bool isLd = isLoadSingle(Opc);
2568 Register Base = MI.getOperand(1).getReg();
2570 bool StopHere = false;
2571 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2572 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2573 if (Inserted) {
2574 BI->second.push_back(&MI);
2575 Bases.push_back(Base);
2576 return;
2577 }
2578 for (const MachineInstr *MI : BI->second) {
2579 if (Offset == getMemoryOpOffset(*MI)) {
2580 StopHere = true;
2581 break;
2582 }
2583 }
2584 if (!StopHere)
2585 BI->second.push_back(&MI);
2586 };
2587
2588 if (isLd)
2589 FindBases(Base2LdsMap, LdBases);
2590 else
2591 FindBases(Base2StsMap, StBases);
2592
2593 if (StopHere) {
2594 // Found a duplicate (a base+offset combination that's seen earlier).
2595 // Backtrack.
2596 --Loc;
2597 break;
2598 }
2599 }
2600
2601 // Re-schedule loads.
2602 for (unsigned Base : LdBases) {
2603 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2604 if (Lds.size() > 1)
2605 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2606 }
2607
2608 // Re-schedule stores.
2609 for (unsigned Base : StBases) {
2610 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2611 if (Sts.size() > 1)
2612 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2613 }
2614
2615 if (MBBI != E) {
2616 Base2LdsMap.clear();
2617 Base2StsMap.clear();
2618 LdBases.clear();
2619 StBases.clear();
2620 }
2621 }
2622
2623 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2624 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2625 // use-before-def, resulting in a loss of debug info.
2626
2627 // Example:
2628 // Before the Pre Register Allocation Load Store Pass
2629 // inst_a
2630 // %2 = ld ...
2631 // inst_b
2632 // DBG_VALUE %2, "x", ...
2633 // %3 = ld ...
2634
2635 // After the Pass:
2636 // inst_a
2637 // inst_b
2638 // DBG_VALUE %2, "x", ...
2639 // %2 = ld ...
2640 // %3 = ld ...
2641
2642 // The code below addresses this by moving the DBG_VALUE to the position
2643 // immediately after the load.
2644
2645 // Example:
2646 // After the code below:
2647 // inst_a
2648 // inst_b
2649 // %2 = ld ...
2650 // DBG_VALUE %2, "x", ...
2651 // %3 = ld ...
2652
2653 // The algorithm works in two phases: First RescheduleOps() populates the
2654 // RegisterMap with registers that were moved as keys, there is no value
2655 // inserted. In the next phase, every MachineInstr in a basic block is
2656 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2657 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2658 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2659 // describes debug information for a variable that already exists in the
2660 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2661 // be set to undef. If the current MachineInstr is a load that was moved,
2662 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2663 // the load.
2664
2665 // To illustrate the above algorithm visually let's take this example.
2666
2667 // Before the Pre Register Allocation Load Store Pass:
2668 // %2 = ld ...
2669 // DBG_VALUE %2, A, .... # X
2670 // DBG_VALUE 0, A, ... # Y
2671 // %3 = ld ...
2672 // DBG_VALUE %3, A, ..., # Z
2673 // %4 = ld ...
2674
2675 // After Pre Register Allocation Load Store Pass:
2676 // DBG_VALUE %2, A, .... # X
2677 // DBG_VALUE 0, A, ... # Y
2678 // DBG_VALUE %3, A, ..., # Z
2679 // %2 = ld ...
2680 // %3 = ld ...
2681 // %4 = ld ...
2682
2683 // The algorithm below does the following:
2684
2685 // In the beginning, the RegisterMap will have been populated with the virtual
2686 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2687 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2688 // InstrMap {}
2689 // -> DBG_VALUE %2, A, .... # X
2690 // DBG_VALUE 0, A, ... # Y
2691 // DBG_VALUE %3, A, ..., # Z
2692 // %2 = ld ...
2693 // %3 = ld ...
2694 // %4 = ld ...
2695
2696 // After the first DBG_VALUE (denoted with an X) is processed, the
2697 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2698 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2699 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2700 // DBG_VALUE %2, A, .... # X
2701 // -> DBG_VALUE 0, A, ... # Y
2702 // DBG_VALUE %3, A, ..., # Z
2703 // %2 = ld ...
2704 // %3 = ld ...
2705 // %4 = ld ...
2706
2707 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2708 // to now hold Y for A and the RegisterMap is also updated to remove X from
2709 // %2, this is because both X and Y describe the same debug variable A. X is
2710 // also updated to have a $noreg as the first operand.
2711 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2712 // InstrMap = {X-> 2}
2713 // DBG_VALUE $noreg, A, .... # X
2714 // DBG_VALUE 0, A, ... # Y
2715 // -> DBG_VALUE %3, A, ..., # Z
2716 // %2 = ld ...
2717 // %3 = ld ...
2718 // %4 = ld ...
2719
2720 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2721 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2722 // is updated to have Z mapped to %3. This is again because Z describes the
2723 // debug variable A, Y is not updated to have $noreg as first operand because
2724 // its first operand is an immediate, not a register.
2725 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2726 // InstrMap = {X -> 2, Z -> 3}
2727 // DBG_VALUE $noreg, A, .... # X
2728 // DBG_VALUE 0, A, ... # Y
2729 // DBG_VALUE %3, A, ..., # Z
2730 // -> %2 = ld ...
2731 // %3 = ld ...
2732 // %4 = ld ...
2733
2734 // Nothing happens here since the RegisterMap for %2 contains no value.
2735 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2736 // InstrMap = {X -> 2, Z -> 3}
2737 // DBG_VALUE $noreg, A, .... # X
2738 // DBG_VALUE 0, A, ... # Y
2739 // DBG_VALUE %3, A, ..., # Z
2740 // %2 = ld ...
2741 // -> %3 = ld ...
2742 // %4 = ld ...
2743
2744 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2745 // pointer Z is copied to come after the load for %3 and the old Z's first
2746 // operand is changed to $noreg the Basic Block iterator is moved to after the
2747 // DBG_VALUE Z's new position.
2748 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2749 // InstrMap = {X -> 2, Z -> 3}
2750 // DBG_VALUE $noreg, A, .... # X
2751 // DBG_VALUE 0, A, ... # Y
2752 // DBG_VALUE $noreg, A, ..., # Old Z
2753 // %2 = ld ...
2754 // %3 = ld ...
2755 // DBG_VALUE %3, A, ..., # Z
2756 // -> %4 = ld ...
2757
2758 // Nothing happens for %4 and the algorithm exits having processed the entire
2759 // Basic Block.
2760 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2761 // InstrMap = {X -> 2, Z -> 3}
2762 // DBG_VALUE $noreg, A, .... # X
2763 // DBG_VALUE 0, A, ... # Y
2764 // DBG_VALUE $noreg, A, ..., # Old Z
2765 // %2 = ld ...
2766 // %3 = ld ...
2767 // DBG_VALUE %3, A, ..., # Z
2768 // %4 = ld ...
2769
2770 // This map is used to track the relationship between
2771 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2772 // debug information for that Debug Variable.
2773 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2774 // This map is used to track the relationship between a DBG_VALUE or
2775 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2776 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2777 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2778 MachineInstr &MI = *MBBI;
2779
2780 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2781 auto RegIt = RegisterMap.find(Reg);
2782 if (RegIt == RegisterMap.end())
2783 return;
2784 auto &InstrVec = RegIt->getSecond();
2785 InstrVec.push_back(&MI);
2786 InstrMap[&MI].push_back(Reg);
2787 };
2788
2789 if (MI.isDebugValue()) {
2790 assert(MI.getDebugVariable() &&
2791 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2792
2794 // If the first operand is a register and it exists in the RegisterMap, we
2795 // know this is a DBG_VALUE that uses the result of a load that was moved,
2796 // and is therefore a candidate to also be moved, add it to the
2797 // RegisterMap and InstrMap.
2798 forEachDbgRegOperand(&MI, [&](MachineOperand &Op) {
2799 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2800 });
2801
2802 // If the current DBG_VALUE describes the same variable as one of the
2803 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2804 // undef. Moving one DBG_VALUE past another would result in the variable's
2805 // value going back in time when stepping through the block in the
2806 // debugger.
2807 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2808 if (InstrIt != DbgValueSinkCandidates.end()) {
2809 auto *Instr = InstrIt->getSecond();
2810 auto RegIt = InstrMap.find(Instr);
2811 if (RegIt != InstrMap.end()) {
2812 const auto &RegVec = RegIt->getSecond();
2813 // For every Register in the RegVec, remove the MachineInstr in the
2814 // RegisterMap that describes the DbgVar.
2815 for (auto &Reg : RegVec) {
2816 auto RegIt = RegisterMap.find(Reg);
2817 if (RegIt == RegisterMap.end())
2818 continue;
2819 auto &InstrVec = RegIt->getSecond();
2820 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2822 return Var == DbgVar;
2823 };
2824
2825 llvm::erase_if(InstrVec, IsDbgVar);
2826 }
2828 [&](MachineOperand &Op) { Op.setReg(0); });
2829 }
2830 }
2831 DbgValueSinkCandidates[DbgVar] = &MI;
2832 } else {
2833 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2834 // then move that DBG_VALUE to below the load.
2835 auto Opc = MI.getOpcode();
2836 if (!isLoadSingle(Opc))
2837 continue;
2838 auto Reg = MI.getOperand(0).getReg();
2839 auto RegIt = RegisterMap.find(Reg);
2840 if (RegIt == RegisterMap.end())
2841 continue;
2842 auto &DbgInstrVec = RegIt->getSecond();
2843 if (!DbgInstrVec.size())
2844 continue;
2845 for (auto *DbgInstr : DbgInstrVec) {
2846 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2847 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2848 MBB->insert(InsertPos, ClonedMI);
2849 MBBI++;
2850 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2851 // that was moved.
2852 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2853 // Erase DbgVar from DbgValueSinkCandidates if still present. If the
2854 // instruction is a DBG_VALUE_LIST, it may have already been erased from
2855 // DbgValueSinkCandidates.
2856 DbgValueSinkCandidates.erase(DbgVar);
2857 // Zero out original dbg instr
2858 forEachDbgRegOperand(DbgInstr,
2859 [&](MachineOperand &Op) { Op.setReg(0); });
2860 // Update RegisterMap with ClonedMI because it might have to be moved
2861 // again.
2862 if (DbgInstr->isDebugValueList())
2863 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2864 DbgInstr);
2865 }
2866 }
2867 }
2868 return RetVal;
2869}
2870
2871// Get the Base register operand index from the memory access MachineInst if we
2872// should attempt to distribute postinc on it. Return -1 if not of a valid
2873// instruction type. If it returns an index, it is assumed that instruction is a
2874// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2876 switch (MI.getOpcode()) {
2877 case ARM::MVE_VLDRBS16:
2878 case ARM::MVE_VLDRBS32:
2879 case ARM::MVE_VLDRBU16:
2880 case ARM::MVE_VLDRBU32:
2881 case ARM::MVE_VLDRHS32:
2882 case ARM::MVE_VLDRHU32:
2883 case ARM::MVE_VLDRBU8:
2884 case ARM::MVE_VLDRHU16:
2885 case ARM::MVE_VLDRWU32:
2886 case ARM::MVE_VSTRB16:
2887 case ARM::MVE_VSTRB32:
2888 case ARM::MVE_VSTRH32:
2889 case ARM::MVE_VSTRBU8:
2890 case ARM::MVE_VSTRHU16:
2891 case ARM::MVE_VSTRWU32:
2892 case ARM::t2LDRHi8:
2893 case ARM::t2LDRHi12:
2894 case ARM::t2LDRSHi8:
2895 case ARM::t2LDRSHi12:
2896 case ARM::t2LDRBi8:
2897 case ARM::t2LDRBi12:
2898 case ARM::t2LDRSBi8:
2899 case ARM::t2LDRSBi12:
2900 case ARM::t2STRBi8:
2901 case ARM::t2STRBi12:
2902 case ARM::t2STRHi8:
2903 case ARM::t2STRHi12:
2904 return 1;
2905 case ARM::MVE_VLDRBS16_post:
2906 case ARM::MVE_VLDRBS32_post:
2907 case ARM::MVE_VLDRBU16_post:
2908 case ARM::MVE_VLDRBU32_post:
2909 case ARM::MVE_VLDRHS32_post:
2910 case ARM::MVE_VLDRHU32_post:
2911 case ARM::MVE_VLDRBU8_post:
2912 case ARM::MVE_VLDRHU16_post:
2913 case ARM::MVE_VLDRWU32_post:
2914 case ARM::MVE_VSTRB16_post:
2915 case ARM::MVE_VSTRB32_post:
2916 case ARM::MVE_VSTRH32_post:
2917 case ARM::MVE_VSTRBU8_post:
2918 case ARM::MVE_VSTRHU16_post:
2919 case ARM::MVE_VSTRWU32_post:
2920 case ARM::MVE_VLDRBS16_pre:
2921 case ARM::MVE_VLDRBS32_pre:
2922 case ARM::MVE_VLDRBU16_pre:
2923 case ARM::MVE_VLDRBU32_pre:
2924 case ARM::MVE_VLDRHS32_pre:
2925 case ARM::MVE_VLDRHU32_pre:
2926 case ARM::MVE_VLDRBU8_pre:
2927 case ARM::MVE_VLDRHU16_pre:
2928 case ARM::MVE_VLDRWU32_pre:
2929 case ARM::MVE_VSTRB16_pre:
2930 case ARM::MVE_VSTRB32_pre:
2931 case ARM::MVE_VSTRH32_pre:
2932 case ARM::MVE_VSTRBU8_pre:
2933 case ARM::MVE_VSTRHU16_pre:
2934 case ARM::MVE_VSTRWU32_pre:
2935 return 2;
2936 }
2937 return -1;
2938}
2939
2941 switch (MI.getOpcode()) {
2942 case ARM::MVE_VLDRBS16_post:
2943 case ARM::MVE_VLDRBS32_post:
2944 case ARM::MVE_VLDRBU16_post:
2945 case ARM::MVE_VLDRBU32_post:
2946 case ARM::MVE_VLDRHS32_post:
2947 case ARM::MVE_VLDRHU32_post:
2948 case ARM::MVE_VLDRBU8_post:
2949 case ARM::MVE_VLDRHU16_post:
2950 case ARM::MVE_VLDRWU32_post:
2951 case ARM::MVE_VSTRB16_post:
2952 case ARM::MVE_VSTRB32_post:
2953 case ARM::MVE_VSTRH32_post:
2954 case ARM::MVE_VSTRBU8_post:
2955 case ARM::MVE_VSTRHU16_post:
2956 case ARM::MVE_VSTRWU32_post:
2957 return true;
2958 }
2959 return false;
2960}
2961
2963 switch (MI.getOpcode()) {
2964 case ARM::MVE_VLDRBS16_pre:
2965 case ARM::MVE_VLDRBS32_pre:
2966 case ARM::MVE_VLDRBU16_pre:
2967 case ARM::MVE_VLDRBU32_pre:
2968 case ARM::MVE_VLDRHS32_pre:
2969 case ARM::MVE_VLDRHU32_pre:
2970 case ARM::MVE_VLDRBU8_pre:
2971 case ARM::MVE_VLDRHU16_pre:
2972 case ARM::MVE_VLDRWU32_pre:
2973 case ARM::MVE_VSTRB16_pre:
2974 case ARM::MVE_VSTRB32_pre:
2975 case ARM::MVE_VSTRH32_pre:
2976 case ARM::MVE_VSTRBU8_pre:
2977 case ARM::MVE_VSTRHU16_pre:
2978 case ARM::MVE_VSTRWU32_pre:
2979 return true;
2980 }
2981 return false;
2982}
2983
2984// Given a memory access Opcode, check that the give Imm would be a valid Offset
2985// for this instruction (same as isLegalAddressImm), Or if the instruction
2986// could be easily converted to one where that was valid. For example converting
2987// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
2988// AdjustBaseAndOffset below.
2989static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
2990 const TargetInstrInfo *TII,
2991 int &CodesizeEstimate) {
2992 if (isLegalAddressImm(Opcode, Imm, TII))
2993 return true;
2994
2995 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
2996 const MCInstrDesc &Desc = TII->get(Opcode);
2997 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2998 switch (AddrMode) {
3000 CodesizeEstimate += 1;
3001 return Imm < 0 && -Imm < ((1 << 8) * 1);
3002 }
3003 return false;
3004}
3005
3006// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3007// by -Offset. This can either happen in-place or be a replacement as MI is
3008// converted to another instruction type.
3010 int Offset, const TargetInstrInfo *TII,
3011 const TargetRegisterInfo *TRI) {
3012 // Set the Base reg
3013 unsigned BaseOp = getBaseOperandIndex(*MI);
3014 MI->getOperand(BaseOp).setReg(NewBaseReg);
3015 // and constrain the reg class to that required by the instruction.
3016 MachineFunction *MF = MI->getMF();
3018 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3019 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp);
3020 MRI.constrainRegClass(NewBaseReg, TRC);
3021
3022 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3023 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3024 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3025 else {
3026 unsigned ConvOpcode;
3027 switch (MI->getOpcode()) {
3028 case ARM::t2LDRHi12:
3029 ConvOpcode = ARM::t2LDRHi8;
3030 break;
3031 case ARM::t2LDRSHi12:
3032 ConvOpcode = ARM::t2LDRSHi8;
3033 break;
3034 case ARM::t2LDRBi12:
3035 ConvOpcode = ARM::t2LDRBi8;
3036 break;
3037 case ARM::t2LDRSBi12:
3038 ConvOpcode = ARM::t2LDRSBi8;
3039 break;
3040 case ARM::t2STRHi12:
3041 ConvOpcode = ARM::t2STRHi8;
3042 break;
3043 case ARM::t2STRBi12:
3044 ConvOpcode = ARM::t2STRBi8;
3045 break;
3046 default:
3047 llvm_unreachable("Unhandled convertable opcode");
3048 }
3049 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3050 "Illegal Address Immediate after convert!");
3051
3052 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3053 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3054 .add(MI->getOperand(0))
3055 .add(MI->getOperand(1))
3056 .addImm(OldOffset - Offset)
3057 .add(MI->getOperand(3))
3058 .add(MI->getOperand(4))
3059 .cloneMemRefs(*MI);
3060 MI->eraseFromParent();
3061 }
3062}
3063
3065 Register NewReg,
3066 const TargetInstrInfo *TII,
3067 const TargetRegisterInfo *TRI) {
3068 MachineFunction *MF = MI->getMF();
3070
3071 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3072 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3073
3074 const MCInstrDesc &MCID = TII->get(NewOpcode);
3075 // Constrain the def register class
3076 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
3077 MRI.constrainRegClass(NewReg, TRC);
3078 // And do the same for the base operand
3079 TRC = TII->getRegClass(MCID, 2);
3080 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3081
3082 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3083 switch (AddrMode) {
3087 // Any MVE load/store
3088 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3089 .addReg(NewReg, RegState::Define)
3090 .add(MI->getOperand(0))
3091 .add(MI->getOperand(1))
3092 .addImm(Offset)
3093 .add(MI->getOperand(3))
3094 .add(MI->getOperand(4))
3095 .add(MI->getOperand(5))
3096 .cloneMemRefs(*MI);
3098 if (MI->mayLoad()) {
3099 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3100 .add(MI->getOperand(0))
3101 .addReg(NewReg, RegState::Define)
3102 .add(MI->getOperand(1))
3103 .addImm(Offset)
3104 .add(MI->getOperand(3))
3105 .add(MI->getOperand(4))
3106 .cloneMemRefs(*MI);
3107 } else {
3108 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3109 .addReg(NewReg, RegState::Define)
3110 .add(MI->getOperand(0))
3111 .add(MI->getOperand(1))
3112 .addImm(Offset)
3113 .add(MI->getOperand(3))
3114 .add(MI->getOperand(4))
3115 .cloneMemRefs(*MI);
3116 }
3117 default:
3118 llvm_unreachable("Unhandled createPostIncLoadStore");
3119 }
3120}
3121
3122// Given a Base Register, optimise the load/store uses to attempt to create more
3123// post-inc accesses and less register moves. We do this by taking zero offset
3124// loads/stores with an add, and convert them to a postinc load/store of the
3125// same type. Any subsequent accesses will be adjusted to use and account for
3126// the post-inc value.
3127// For example:
3128// LDR #0 LDR_POSTINC #16
3129// LDR #4 LDR #-12
3130// LDR #8 LDR #-8
3131// LDR #12 LDR #-4
3132// ADD #16
3133//
3134// At the same time if we do not find an increment but do find an existing
3135// pre/post inc instruction, we can still adjust the offsets of subsequent
3136// instructions to save the register move that would otherwise be needed for the
3137// in-place increment.
3138bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3139 // We are looking for:
3140 // One zero offset load/store that can become postinc
3141 MachineInstr *BaseAccess = nullptr;
3142 MachineInstr *PrePostInc = nullptr;
3143 // An increment that can be folded in
3144 MachineInstr *Increment = nullptr;
3145 // Other accesses after BaseAccess that will need to be updated to use the
3146 // postinc value.
3147 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3148 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3149 if (!Increment && getAddSubImmediate(Use) != 0) {
3150 Increment = &Use;
3151 continue;
3152 }
3153
3154 int BaseOp = getBaseOperandIndex(Use);
3155 if (BaseOp == -1)
3156 return false;
3157
3158 if (!Use.getOperand(BaseOp).isReg() ||
3159 Use.getOperand(BaseOp).getReg() != Base)
3160 return false;
3161 if (isPreIndex(Use) || isPostIndex(Use))
3162 PrePostInc = &Use;
3163 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3164 BaseAccess = &Use;
3165 else
3166 OtherAccesses.insert(&Use);
3167 }
3168
3169 int IncrementOffset;
3170 Register NewBaseReg;
3171 if (BaseAccess && Increment) {
3172 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3173 return false;
3174 Register PredReg;
3175 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3177 return false;
3178
3179 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3180 << Base.virtRegIndex() << "\n");
3181
3182 // Make sure that Increment has no uses before BaseAccess that are not PHI
3183 // uses.
3184 for (MachineInstr &Use :
3185 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3186 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3187 !DT->dominates(BaseAccess, &Use))) {
3188 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3189 return false;
3190 }
3191 }
3192
3193 // Make sure that Increment can be folded into Base
3194 IncrementOffset = getAddSubImmediate(*Increment);
3195 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3196 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3197 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3198 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3199 return false;
3200 }
3201 }
3202 else if (PrePostInc) {
3203 // If we already have a pre/post index load/store then set BaseAccess,
3204 // IncrementOffset and NewBaseReg to the values it already produces,
3205 // allowing us to update and subsequent uses of BaseOp reg with the
3206 // incremented value.
3207 if (Increment)
3208 return false;
3209
3210 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3211 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3212 int BaseOp = getBaseOperandIndex(*PrePostInc);
3213 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3214 BaseAccess = PrePostInc;
3215 NewBaseReg = PrePostInc->getOperand(0).getReg();
3216 }
3217 else
3218 return false;
3219
3220 // And make sure that the negative value of increment can be added to all
3221 // other offsets after the BaseAccess. We rely on either
3222 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3223 // to keep things simple.
3224 // This also adds a simple codesize metric, to detect if an instruction (like
3225 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3226 // cannot because it is converted to something else (t2LDRBi8). We start this
3227 // at -1 for the gain from removing the increment.
3228 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3229 int CodesizeEstimate = -1;
3230 for (auto *Use : OtherAccesses) {
3231 if (DT->dominates(BaseAccess, Use)) {
3232 SuccessorAccesses.insert(Use);
3233 unsigned BaseOp = getBaseOperandIndex(*Use);
3234 if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
3235 Use->getOperand(BaseOp + 1).getImm() -
3236 IncrementOffset,
3237 TII, CodesizeEstimate)) {
3238 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3239 return false;
3240 }
3241 } else if (!DT->dominates(Use, BaseAccess)) {
3242 LLVM_DEBUG(
3243 dbgs() << " Unknown dominance relation between Base and Use\n");
3244 return false;
3245 }
3246 }
3247 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3248 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3249 return false;
3250 }
3251
3252 if (!PrePostInc) {
3253 // Replace BaseAccess with a post inc
3254 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3255 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3256 NewBaseReg = Increment->getOperand(0).getReg();
3257 MachineInstr *BaseAccessPost =
3258 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3259 BaseAccess->eraseFromParent();
3260 Increment->eraseFromParent();
3261 (void)BaseAccessPost;
3262 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3263 }
3264
3265 for (auto *Use : SuccessorAccesses) {
3266 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3267 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3268 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3269 }
3270
3271 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3272 // remain.
3273 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3274 Op.setIsKill(false);
3275 return true;
3276}
3277
3278bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3279 bool Changed = false;
3280 SmallSetVector<Register, 4> Visited;
3281 for (auto &MBB : *MF) {
3282 for (auto &MI : MBB) {
3283 int BaseOp = getBaseOperandIndex(MI);
3284 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3285 continue;
3286
3287 Register Base = MI.getOperand(BaseOp).getReg();
3288 if (!Base.isVirtual())
3289 continue;
3290
3291 Visited.insert(Base);
3292 }
3293 }
3294
3295 for (auto Base : Visited)
3296 Changed |= DistributeIncrements(Base);
3297
3298 return Changed;
3299}
3300
3301/// Returns an instance of the load / store optimization pass.
3303 if (PreAlloc)
3304 return new ARMPreAllocLoadStoreOpt();
3305 return new ARMLoadStoreOpt();
3306}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasMinSize() const
bool isCortexM3() const
Align getDualLoadStoreAlignment() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
Definition Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:47
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
This namespace contains all of the command line option processing machinery.
Definition CommandLine.h:52
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1626
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
Definition VE.h:377
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1910
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39