LLVM 23.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99/// Post- register allocation pass the combine load / store instructions to
100/// form ldm / stm instructions.
101struct ARMLoadStoreOpt {
102 const MachineFunction *MF;
103 const TargetInstrInfo *TII;
104 const TargetRegisterInfo *TRI;
105 const ARMSubtarget *STI;
106 const TargetLowering *TL;
107 ARMFunctionInfo *AFI;
109 RegisterClassInfo RegClassInfo;
111 bool LiveRegsValid;
112 bool RegClassInfoValid;
113 bool isThumb1, isThumb2;
114
115 bool runOnMachineFunction(MachineFunction &Fn);
116
117private:
118 /// A set of load/store MachineInstrs with same base register sorted by
119 /// offset.
120 struct MemOpQueueEntry {
122 int Offset; ///< Load/Store offset.
123 unsigned Position; ///< Position as counted from end of basic block.
124
125 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
126 : MI(&MI), Offset(Offset), Position(Position) {}
127 };
128 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
129
130 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
131 /// merged into a LDM/STM.
132 struct MergeCandidate {
133 /// List of instructions ordered by load/store offset.
135
136 /// Index in Instrs of the instruction being latest in the schedule.
137 unsigned LatestMIIdx;
138
139 /// Index in Instrs of the instruction being earliest in the schedule.
140 unsigned EarliestMIIdx;
141
142 /// Index into the basic block where the merged instruction will be
143 /// inserted. (See MemOpQueueEntry.Position)
144 unsigned InsertPos;
145
146 /// Whether the instructions can be merged into a ldm/stm instruction.
147 bool CanMergeToLSMulti;
148
149 /// Whether the instructions can be merged into a ldrd/strd instruction.
150 bool CanMergeToLSDouble;
151 };
154 SmallVector<MachineInstr *, 4> MergeBaseCandidates;
155
156 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
158 unsigned findFreeReg(const TargetRegisterClass &RegClass);
159 void UpdateBaseRegUses(MachineBasicBlock &MBB,
161 unsigned Base, unsigned WordOffset,
162 ARMCC::CondCodes Pred, unsigned PredReg);
163 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
164 MachineBasicBlock::iterator InsertBefore,
165 int Offset, unsigned Base, bool BaseKill,
166 unsigned Opcode, ARMCC::CondCodes Pred,
167 unsigned PredReg, const DebugLoc &DL,
168 ArrayRef<std::pair<unsigned, bool>> Regs,
170 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
171 MachineBasicBlock::iterator InsertBefore,
172 int Offset, unsigned Base, bool BaseKill,
173 unsigned Opcode, ARMCC::CondCodes Pred,
174 unsigned PredReg, const DebugLoc &DL,
175 ArrayRef<std::pair<unsigned, bool>> Regs,
176 ArrayRef<MachineInstr *> Instrs) const;
177 void FormCandidates(const MemOpQueue &MemOps);
178 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
179 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
181 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
182 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
183 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
184 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
185 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
186 bool CombineMovBx(MachineBasicBlock &MBB);
187};
188
189struct ARMLoadStoreOptLegacy : public MachineFunctionPass {
190 static char ID;
191
192 ARMLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
193
194 bool runOnMachineFunction(MachineFunction &Fn) override;
195
196 MachineFunctionProperties getRequiredProperties() const override {
197 return MachineFunctionProperties().setNoVRegs();
198 }
199
200 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
201};
202
203char ARMLoadStoreOptLegacy::ID = 0;
204
205} // end anonymous namespace
206
207INITIALIZE_PASS(ARMLoadStoreOptLegacy, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME,
208 false, false)
209
210static bool definesCPSR(const MachineInstr &MI) {
211 for (const auto &MO : MI.operands()) {
212 if (!MO.isReg())
213 continue;
214 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
215 // If the instruction has live CPSR def, then it's not safe to fold it
216 // into load / store.
217 return true;
218 }
219
220 return false;
221}
222
224 unsigned Opcode = MI.getOpcode();
225 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
226 unsigned NumOperands = MI.getDesc().getNumOperands();
227 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
228
229 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
230 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
231 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
232 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
233 return OffField;
234
235 // Thumb1 immediate offsets are scaled by 4
236 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
237 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
238 return OffField * 4;
239
240 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
241 : ARM_AM::getAM5Offset(OffField) * 4;
242 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
243 : ARM_AM::getAM5Op(OffField);
244
245 if (Op == ARM_AM::sub)
246 return -Offset;
247
248 return Offset;
249}
250
252 return MI.getOperand(1);
253}
254
256 return MI.getOperand(0);
257}
258
260 switch (Opcode) {
261 default: llvm_unreachable("Unhandled opcode!");
262 case ARM::LDRi12:
263 ++NumLDMGened;
264 switch (Mode) {
265 default: llvm_unreachable("Unhandled submode!");
266 case ARM_AM::ia: return ARM::LDMIA;
267 case ARM_AM::da: return ARM::LDMDA;
268 case ARM_AM::db: return ARM::LDMDB;
269 case ARM_AM::ib: return ARM::LDMIB;
270 }
271 case ARM::STRi12:
272 ++NumSTMGened;
273 switch (Mode) {
274 default: llvm_unreachable("Unhandled submode!");
275 case ARM_AM::ia: return ARM::STMIA;
276 case ARM_AM::da: return ARM::STMDA;
277 case ARM_AM::db: return ARM::STMDB;
278 case ARM_AM::ib: return ARM::STMIB;
279 }
280 case ARM::tLDRi:
281 case ARM::tLDRspi:
282 // tLDMIA is writeback-only - unless the base register is in the input
283 // reglist.
284 ++NumLDMGened;
285 switch (Mode) {
286 default: llvm_unreachable("Unhandled submode!");
287 case ARM_AM::ia: return ARM::tLDMIA;
288 }
289 case ARM::tSTRi:
290 case ARM::tSTRspi:
291 // There is no non-writeback tSTMIA either.
292 ++NumSTMGened;
293 switch (Mode) {
294 default: llvm_unreachable("Unhandled submode!");
295 case ARM_AM::ia: return ARM::tSTMIA_UPD;
296 }
297 case ARM::t2LDRi8:
298 case ARM::t2LDRi12:
299 ++NumLDMGened;
300 switch (Mode) {
301 default: llvm_unreachable("Unhandled submode!");
302 case ARM_AM::ia: return ARM::t2LDMIA;
303 case ARM_AM::db: return ARM::t2LDMDB;
304 }
305 case ARM::t2STRi8:
306 case ARM::t2STRi12:
307 ++NumSTMGened;
308 switch (Mode) {
309 default: llvm_unreachable("Unhandled submode!");
310 case ARM_AM::ia: return ARM::t2STMIA;
311 case ARM_AM::db: return ARM::t2STMDB;
312 }
313 case ARM::VLDRS:
314 ++NumVLDMGened;
315 switch (Mode) {
316 default: llvm_unreachable("Unhandled submode!");
317 case ARM_AM::ia: return ARM::VLDMSIA;
318 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
319 }
320 case ARM::VSTRS:
321 ++NumVSTMGened;
322 switch (Mode) {
323 default: llvm_unreachable("Unhandled submode!");
324 case ARM_AM::ia: return ARM::VSTMSIA;
325 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
326 }
327 case ARM::VLDRD:
328 ++NumVLDMGened;
329 switch (Mode) {
330 default: llvm_unreachable("Unhandled submode!");
331 case ARM_AM::ia: return ARM::VLDMDIA;
332 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
333 }
334 case ARM::VSTRD:
335 ++NumVSTMGened;
336 switch (Mode) {
337 default: llvm_unreachable("Unhandled submode!");
338 case ARM_AM::ia: return ARM::VSTMDIA;
339 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
340 }
341 }
342}
343
345 switch (Opcode) {
346 default: llvm_unreachable("Unhandled opcode!");
347 case ARM::LDMIA_RET:
348 case ARM::LDMIA:
349 case ARM::LDMIA_UPD:
350 case ARM::STMIA:
351 case ARM::STMIA_UPD:
352 case ARM::tLDMIA:
353 case ARM::tLDMIA_UPD:
354 case ARM::tSTMIA_UPD:
355 case ARM::t2LDMIA_RET:
356 case ARM::t2LDMIA:
357 case ARM::t2LDMIA_UPD:
358 case ARM::t2STMIA:
359 case ARM::t2STMIA_UPD:
360 case ARM::VLDMSIA:
361 case ARM::VLDMSIA_UPD:
362 case ARM::VSTMSIA:
363 case ARM::VSTMSIA_UPD:
364 case ARM::VLDMDIA:
365 case ARM::VLDMDIA_UPD:
366 case ARM::VSTMDIA:
367 case ARM::VSTMDIA_UPD:
368 return ARM_AM::ia;
369
370 case ARM::LDMDA:
371 case ARM::LDMDA_UPD:
372 case ARM::STMDA:
373 case ARM::STMDA_UPD:
374 return ARM_AM::da;
375
376 case ARM::LDMDB:
377 case ARM::LDMDB_UPD:
378 case ARM::STMDB:
379 case ARM::STMDB_UPD:
380 case ARM::t2LDMDB:
381 case ARM::t2LDMDB_UPD:
382 case ARM::t2STMDB:
383 case ARM::t2STMDB_UPD:
384 case ARM::VLDMSDB_UPD:
385 case ARM::VSTMSDB_UPD:
386 case ARM::VLDMDDB_UPD:
387 case ARM::VSTMDDB_UPD:
388 return ARM_AM::db;
389
390 case ARM::LDMIB:
391 case ARM::LDMIB_UPD:
392 case ARM::STMIB:
393 case ARM::STMIB_UPD:
394 return ARM_AM::ib;
395 }
396}
397
398static bool isT1i32Load(unsigned Opc) {
399 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
400}
401
402static bool isT2i32Load(unsigned Opc) {
403 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
404}
405
406static bool isi32Load(unsigned Opc) {
407 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
408}
409
410static bool isT1i32Store(unsigned Opc) {
411 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
412}
413
414static bool isT2i32Store(unsigned Opc) {
415 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
416}
417
418static bool isi32Store(unsigned Opc) {
419 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
420}
421
422static bool isLoadSingle(unsigned Opc) {
423 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
424}
425
426static unsigned getImmScale(unsigned Opc) {
427 switch (Opc) {
428 default: llvm_unreachable("Unhandled opcode!");
429 case ARM::tLDRi:
430 case ARM::tSTRi:
431 case ARM::tLDRspi:
432 case ARM::tSTRspi:
433 return 1;
434 case ARM::tLDRHi:
435 case ARM::tSTRHi:
436 return 2;
437 case ARM::tLDRBi:
438 case ARM::tSTRBi:
439 return 4;
440 }
441}
442
444 switch (MI->getOpcode()) {
445 default: return 0;
446 case ARM::LDRi12:
447 case ARM::STRi12:
448 case ARM::tLDRi:
449 case ARM::tSTRi:
450 case ARM::tLDRspi:
451 case ARM::tSTRspi:
452 case ARM::t2LDRi8:
453 case ARM::t2LDRi12:
454 case ARM::t2STRi8:
455 case ARM::t2STRi12:
456 case ARM::VLDRS:
457 case ARM::VSTRS:
458 return 4;
459 case ARM::VLDRD:
460 case ARM::VSTRD:
461 return 8;
462 case ARM::LDMIA:
463 case ARM::LDMDA:
464 case ARM::LDMDB:
465 case ARM::LDMIB:
466 case ARM::STMIA:
467 case ARM::STMDA:
468 case ARM::STMDB:
469 case ARM::STMIB:
470 case ARM::tLDMIA:
471 case ARM::tLDMIA_UPD:
472 case ARM::tSTMIA_UPD:
473 case ARM::t2LDMIA:
474 case ARM::t2LDMDB:
475 case ARM::t2STMIA:
476 case ARM::t2STMDB:
477 case ARM::VLDMSIA:
478 case ARM::VSTMSIA:
479 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
480 case ARM::VLDMDIA:
481 case ARM::VSTMDIA:
482 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
483 }
484}
485
486/// Update future uses of the base register with the offset introduced
487/// due to writeback. This function only works on Thumb1.
488void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
490 const DebugLoc &DL, unsigned Base,
491 unsigned WordOffset,
492 ARMCC::CondCodes Pred,
493 unsigned PredReg) {
494 assert(isThumb1 && "Can only update base register uses for Thumb1!");
495 // Start updating any instructions with immediate offsets. Insert a SUB before
496 // the first non-updateable instruction (if any).
497 for (; MBBI != MBB.end(); ++MBBI) {
498 bool InsertSub = false;
499 unsigned Opc = MBBI->getOpcode();
500
501 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
502 int Offset;
503 bool IsLoad =
504 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
505 bool IsStore =
506 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
507
508 if (IsLoad || IsStore) {
509 // Loads and stores with immediate offsets can be updated, but only if
510 // the new offset isn't negative.
511 // The MachineOperand containing the offset immediate is the last one
512 // before predicates.
513 MachineOperand &MO =
514 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
515 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
516 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
517
518 // If storing the base register, it needs to be reset first.
519 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
520
521 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
522 MO.setImm(Offset);
523 else
524 InsertSub = true;
525 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
526 !definesCPSR(*MBBI)) {
527 // SUBS/ADDS using this register, with a dead def of the CPSR.
528 // Merge it with the update; if the merged offset is too large,
529 // insert a new sub instead.
530 MachineOperand &MO =
531 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
532 Offset = (Opc == ARM::tSUBi8) ?
533 MO.getImm() + WordOffset * 4 :
534 MO.getImm() - WordOffset * 4 ;
535 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
536 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
537 // Offset == 0.
538 MO.setImm(Offset);
539 // The base register has now been reset, so exit early.
540 return;
541 } else {
542 InsertSub = true;
543 }
544 } else {
545 // Can't update the instruction.
546 InsertSub = true;
547 }
548 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
549 // Since SUBS sets the condition flags, we can't place the base reset
550 // after an instruction that has a live CPSR def.
551 // The base register might also contain an argument for a function call.
552 InsertSub = true;
553 }
554
555 if (InsertSub) {
556 // An instruction above couldn't be updated, so insert a sub.
557 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
558 .add(t1CondCodeOp(true))
559 .addReg(Base)
560 .addImm(WordOffset * 4)
561 .addImm(Pred)
562 .addReg(PredReg);
563 return;
564 }
565
566 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
567 MBBI->definesRegister(Base, /*TRI=*/nullptr))
568 // Register got killed. Stop updating.
569 return;
570 }
571
572 // End of block was reached.
573 if (!MBB.succ_empty()) {
574 // FIXME: Because of a bug, live registers are sometimes missing from
575 // the successor blocks' live-in sets. This means we can't trust that
576 // information and *always* have to reset at the end of a block.
577 // See PR21029.
578 if (MBBI != MBB.end()) --MBBI;
579 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
580 .add(t1CondCodeOp(true))
581 .addReg(Base)
582 .addImm(WordOffset * 4)
583 .addImm(Pred)
584 .addReg(PredReg);
585 }
586}
587
588/// Return the first register of class \p RegClass that is not in \p Regs.
589unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
590 if (!RegClassInfoValid) {
591 RegClassInfo.runOnMachineFunction(*MF);
592 RegClassInfoValid = true;
593 }
594
595 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
596 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
597 return Reg;
598 return 0;
599}
600
601/// Compute live registers just before instruction \p Before (in normal schedule
602/// direction). Computes backwards so multiple queries in the same block must
603/// come in reverse order.
604void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
606 // Initialize if we never queried in this block.
607 if (!LiveRegsValid) {
608 LiveRegs.init(*TRI);
609 LiveRegs.addLiveOuts(MBB);
610 LiveRegPos = MBB.end();
611 LiveRegsValid = true;
612 }
613 // Move backward just before the "Before" position.
614 while (LiveRegPos != Before) {
615 --LiveRegPos;
616 if (!LiveRegPos->isDebugInstr())
617 LiveRegs.stepBackward(*LiveRegPos);
618 }
619}
620
621static bool ContainsReg(ArrayRef<std::pair<unsigned, bool>> Regs,
622 unsigned Reg) {
623 for (const std::pair<unsigned, bool> &R : Regs)
624 if (R.first == Reg)
625 return true;
626 return false;
627}
628
629/// Create and insert a LDM or STM with Base as base register and registers in
630/// Regs as the register operands that would be loaded / stored. It returns
631/// true if the transformation is done.
632MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
633 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
634 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
635 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
636 ArrayRef<std::pair<unsigned, bool>> Regs,
638 unsigned NumRegs = Regs.size();
639 assert(NumRegs > 1);
640
641 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
642 // Compute liveness information for that register to make the decision.
643 bool SafeToClobberCPSR = !isThumb1 ||
644 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
646
647 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
648
649 // Exception: If the base register is in the input reglist, Thumb1 LDM is
650 // non-writeback.
651 // It's also not possible to merge an STR of the base register in Thumb1.
652 if (isThumb1 && ContainsReg(Regs, Base)) {
653 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
654 if (Opcode == ARM::tLDRi)
655 Writeback = false;
656 else if (Opcode == ARM::tSTRi)
657 return nullptr;
658 }
659
661 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
662 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
663 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
664
665 if (Offset == 4 && haveIBAndDA) {
667 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
669 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
670 // VLDM/VSTM do not support DB mode without also updating the base reg.
672 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
673 // Check if this is a supported opcode before inserting instructions to
674 // calculate a new base register.
675 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
676
677 // If starting offset isn't zero, insert a MI to materialize a new base.
678 // But only do so if it is cost effective, i.e. merging more than two
679 // loads / stores.
680 if (NumRegs <= 2)
681 return nullptr;
682
683 // On Thumb1, it's not worth materializing a new base register without
684 // clobbering the CPSR (i.e. not using ADDS/SUBS).
685 if (!SafeToClobberCPSR)
686 return nullptr;
687
688 unsigned NewBase;
689 if (isi32Load(Opcode)) {
690 // If it is a load, then just use one of the destination registers
691 // as the new base. Will no longer be writeback in Thumb1.
692 NewBase = Regs[NumRegs-1].first;
693 Writeback = false;
694 } else {
695 // Find a free register that we can use as scratch register.
696 moveLiveRegsBefore(MBB, InsertBefore);
697 // The merged instruction does not exist yet but will use several Regs if
698 // it is a Store.
699 if (!isLoadSingle(Opcode))
700 for (const std::pair<unsigned, bool> &R : Regs)
701 LiveRegs.addReg(R.first);
702
703 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
704 if (NewBase == 0)
705 return nullptr;
706 }
707
708 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
709 : ARM::t2ADDri)
710 : (isThumb1 && Base == ARM::SP)
711 ? ARM::tADDrSPi
712 : (isThumb1 && Offset < 8)
713 ? ARM::tADDi3
714 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
715
716 if (Offset < 0) {
717 // FIXME: There are no Thumb1 load/store instructions with negative
718 // offsets. So the Base != ARM::SP might be unnecessary.
719 Offset = -Offset;
720 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
721 : ARM::t2SUBri)
722 : (isThumb1 && Offset < 8 && Base != ARM::SP)
723 ? ARM::tSUBi3
724 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
725 }
726
727 if (!TL->isLegalAddImmediate(Offset))
728 // FIXME: Try add with register operand?
729 return nullptr; // Probably not worth it then.
730
731 // We can only append a kill flag to the add/sub input if the value is not
732 // used in the register list of the stm as well.
733 bool KillOldBase = BaseKill &&
734 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
735
736 if (isThumb1) {
737 // Thumb1: depending on immediate size, use either
738 // ADDS NewBase, Base, #imm3
739 // or
740 // MOV NewBase, Base
741 // ADDS NewBase, #imm8.
742 if (Base != NewBase &&
743 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
744 // Need to insert a MOV to the new base first.
745 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
746 !STI->hasV6Ops()) {
747 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
748 if (Pred != ARMCC::AL)
749 return nullptr;
750 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
751 .addReg(Base, getKillRegState(KillOldBase));
752 } else
753 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
754 .addReg(Base, getKillRegState(KillOldBase))
755 .add(predOps(Pred, PredReg));
756
757 // The following ADDS/SUBS becomes an update.
758 Base = NewBase;
759 KillOldBase = true;
760 }
761 if (BaseOpc == ARM::tADDrSPi) {
762 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
763 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
764 .addReg(Base, getKillRegState(KillOldBase))
765 .addImm(Offset / 4)
766 .add(predOps(Pred, PredReg));
767 } else
768 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
769 .add(t1CondCodeOp(true))
770 .addReg(Base, getKillRegState(KillOldBase))
771 .addImm(Offset)
772 .add(predOps(Pred, PredReg));
773 } else {
774 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
775 .addReg(Base, getKillRegState(KillOldBase))
776 .addImm(Offset)
777 .add(predOps(Pred, PredReg))
778 .add(condCodeOp());
779 }
780 Base = NewBase;
781 BaseKill = true; // New base is always killed straight away.
782 }
783
784 bool isDef = isLoadSingle(Opcode);
785
786 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
787 // base register writeback.
788 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
789 if (!Opcode)
790 return nullptr;
791
792 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
793 // - There is no writeback (LDM of base register),
794 // - the base register is killed by the merged instruction,
795 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
796 // to reset the base register.
797 // Otherwise, don't merge.
798 // It's safe to return here since the code to materialize a new base register
799 // above is also conditional on SafeToClobberCPSR.
800 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
801 return nullptr;
802
803 MachineInstrBuilder MIB;
804
805 if (Writeback) {
806 assert(isThumb1 && "expected Writeback only inThumb1");
807 if (Opcode == ARM::tLDMIA) {
808 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
809 // Update tLDMIA with writeback if necessary.
810 Opcode = ARM::tLDMIA_UPD;
811 }
812
813 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
814
815 // Thumb1: we might need to set base writeback when building the MI.
816 MIB.addReg(Base, getDefRegState(true))
817 .addReg(Base, getKillRegState(BaseKill));
818
819 // The base isn't dead after a merged instruction with writeback.
820 // Insert a sub instruction after the newly formed instruction to reset.
821 if (!BaseKill)
822 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
823 } else {
824 // No writeback, simply build the MachineInstr.
825 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
826 MIB.addReg(Base, getKillRegState(BaseKill));
827 }
828
829 MIB.addImm(Pred).addReg(PredReg);
830
831 for (const std::pair<unsigned, bool> &R : Regs)
832 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
833
834 MIB.cloneMergedMemRefs(Instrs);
835
836 return MIB.getInstr();
837}
838
839MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
840 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
841 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
842 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
843 ArrayRef<std::pair<unsigned, bool>> Regs,
844 ArrayRef<MachineInstr*> Instrs) const {
845 bool IsLoad = isi32Load(Opcode);
846 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
847 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
848
849 assert(Regs.size() == 2);
850 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
851 TII->get(LoadStoreOpcode));
852 if (IsLoad) {
853 MIB.addReg(Regs[0].first, RegState::Define)
854 .addReg(Regs[1].first, RegState::Define);
855 } else {
856 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
857 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
858 }
859 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
860 MIB.cloneMergedMemRefs(Instrs);
861 return MIB.getInstr();
862}
863
864/// Call MergeOps and update MemOps and merges accordingly on success.
865MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
866 const MachineInstr *First = Cand.Instrs.front();
867 unsigned Opcode = First->getOpcode();
868 bool IsLoad = isLoadSingle(Opcode);
870 SmallVector<unsigned, 4> ImpDefs;
871 DenseSet<unsigned> KilledRegs;
872 DenseSet<unsigned> UsedRegs;
873 // Determine list of registers and list of implicit super-register defs.
874 for (const MachineInstr *MI : Cand.Instrs) {
875 const MachineOperand &MO = getLoadStoreRegOp(*MI);
876 Register Reg = MO.getReg();
877 bool IsKill = MO.isKill();
878 if (IsKill)
879 KilledRegs.insert(Reg);
880 Regs.push_back(std::make_pair(Reg, IsKill));
881 UsedRegs.insert(Reg);
882
883 if (IsLoad) {
884 // Collect any implicit defs of super-registers, after merging we can't
885 // be sure anymore that we properly preserved these live ranges and must
886 // removed these implicit operands.
887 for (const MachineOperand &MO : MI->implicit_operands()) {
888 if (!MO.isReg() || !MO.isDef() || MO.isDead())
889 continue;
890 assert(MO.isImplicit());
891 Register DefReg = MO.getReg();
892
893 if (is_contained(ImpDefs, DefReg))
894 continue;
895 // We can ignore cases where the super-reg is read and written.
896 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
897 continue;
898 ImpDefs.push_back(DefReg);
899 }
900 }
901 }
902
903 // Attempt the merge.
905
906 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
907 iterator InsertBefore = std::next(iterator(LatestMI));
908 MachineBasicBlock &MBB = *LatestMI->getParent();
909 unsigned Offset = getMemoryOpOffset(*First);
911 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
912 Register PredReg;
913 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
914 DebugLoc DL = First->getDebugLoc();
915 MachineInstr *Merged = nullptr;
916 if (Cand.CanMergeToLSDouble)
917 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
918 Opcode, Pred, PredReg, DL, Regs,
919 Cand.Instrs);
920 if (!Merged && Cand.CanMergeToLSMulti)
921 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
922 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
923 if (!Merged)
924 return nullptr;
925
926 // Determine earliest instruction that will get removed. We then keep an
927 // iterator just above it so the following erases don't invalidated it.
928 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
929 bool EarliestAtBegin = false;
930 if (EarliestI == MBB.begin()) {
931 EarliestAtBegin = true;
932 } else {
933 EarliestI = std::prev(EarliestI);
934 }
935
936 // Remove instructions which have been merged.
937 for (MachineInstr *MI : Cand.Instrs)
938 MBB.erase(MI);
939
940 // Determine range between the earliest removed instruction and the new one.
941 if (EarliestAtBegin)
942 EarliestI = MBB.begin();
943 else
944 EarliestI = std::next(EarliestI);
945 auto FixupRange = make_range(EarliestI, iterator(Merged));
946
947 if (isLoadSingle(Opcode)) {
948 // If the previous loads defined a super-reg, then we have to mark earlier
949 // operands undef; Replicate the super-reg def on the merged instruction.
950 for (MachineInstr &MI : FixupRange) {
951 for (unsigned &ImpDefReg : ImpDefs) {
952 for (MachineOperand &MO : MI.implicit_operands()) {
953 if (!MO.isReg() || MO.getReg() != ImpDefReg)
954 continue;
955 if (MO.readsReg())
956 MO.setIsUndef();
957 else if (MO.isDef())
958 ImpDefReg = 0;
959 }
960 }
961 }
962
963 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
964 for (unsigned ImpDef : ImpDefs)
965 MIB.addReg(ImpDef, RegState::ImplicitDefine);
966 } else {
967 // Remove kill flags: We are possibly storing the values later now.
968 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
969 for (MachineInstr &MI : FixupRange) {
970 for (MachineOperand &MO : MI.uses()) {
971 if (!MO.isReg() || !MO.isKill())
972 continue;
973 if (UsedRegs.count(MO.getReg()))
974 MO.setIsKill(false);
975 }
976 }
977 assert(ImpDefs.empty());
978 }
979
980 return Merged;
981}
982
984 unsigned Value = abs(Offset);
985 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
986 // multiplied by 4.
987 return (Value % 4) == 0 && Value < 1024;
988}
989
990/// Return true for loads/stores that can be combined to a double/multi
991/// operation without increasing the requirements for alignment.
993 const MachineInstr &MI) {
994 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
995 // difference.
996 unsigned Opcode = MI.getOpcode();
997 if (!isi32Load(Opcode) && !isi32Store(Opcode))
998 return true;
999
1000 // Stack pointer alignment is out of the programmers control so we can trust
1001 // SP-relative loads/stores.
1002 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
1004 return true;
1005 return false;
1006}
1007
1008/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1009void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1010 const MachineInstr *FirstMI = MemOps[0].MI;
1011 unsigned Opcode = FirstMI->getOpcode();
1012 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1013 unsigned Size = getLSMultipleTransferSize(FirstMI);
1014
1015 unsigned SIndex = 0;
1016 unsigned EIndex = MemOps.size();
1017 do {
1018 // Look at the first instruction.
1019 const MachineInstr *MI = MemOps[SIndex].MI;
1020 int Offset = MemOps[SIndex].Offset;
1021 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1022 Register PReg = PMO.getReg();
1023 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1024 : TRI->getEncodingValue(PReg);
1025 unsigned Latest = SIndex;
1026 unsigned Earliest = SIndex;
1027 unsigned Count = 1;
1028 bool CanMergeToLSDouble =
1029 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1030 // ARM errata 602117: LDRD with base in list may result in incorrect base
1031 // register when interrupted or faulted.
1032 if (STI->isCortexM3() && isi32Load(Opcode) &&
1033 PReg == getLoadStoreBaseOp(*MI).getReg())
1034 CanMergeToLSDouble = false;
1035
1036 bool CanMergeToLSMulti = true;
1037 // On swift vldm/vstm starting with an odd register number as that needs
1038 // more uops than single vldrs.
1039 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1040 CanMergeToLSMulti = false;
1041
1042 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1043 // deprecated; LDM to PC is fine but cannot happen here.
1044 if (PReg == ARM::SP || PReg == ARM::PC)
1045 CanMergeToLSMulti = CanMergeToLSDouble = false;
1046
1047 // Should we be conservative?
1049 CanMergeToLSMulti = CanMergeToLSDouble = false;
1050
1051 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1052 unsigned Limit;
1053 switch (Opcode) {
1054 default:
1055 Limit = UINT_MAX;
1056 break;
1057 case ARM::VLDRD:
1058 case ARM::VSTRD:
1059 Limit = 16;
1060 break;
1061 }
1062
1063 // Merge following instructions where possible.
1064 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1065 int NewOffset = MemOps[I].Offset;
1066 if (NewOffset != Offset + (int)Size)
1067 break;
1068 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1069 Register Reg = MO.getReg();
1070 if (Reg == ARM::SP || Reg == ARM::PC)
1071 break;
1072 if (Count == Limit)
1073 break;
1074
1075 // See if the current load/store may be part of a multi load/store.
1076 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1077 : TRI->getEncodingValue(Reg);
1078 bool PartOfLSMulti = CanMergeToLSMulti;
1079 if (PartOfLSMulti) {
1080 // Register numbers must be in ascending order.
1081 if (RegNum <= PRegNum)
1082 PartOfLSMulti = false;
1083 // For VFP / NEON load/store multiples, the registers must be
1084 // consecutive and within the limit on the number of registers per
1085 // instruction.
1086 else if (!isNotVFP && RegNum != PRegNum+1)
1087 PartOfLSMulti = false;
1088 }
1089 // See if the current load/store may be part of a double load/store.
1090 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1091
1092 if (!PartOfLSMulti && !PartOfLSDouble)
1093 break;
1094 CanMergeToLSMulti &= PartOfLSMulti;
1095 CanMergeToLSDouble &= PartOfLSDouble;
1096 // Track MemOp with latest and earliest position (Positions are
1097 // counted in reverse).
1098 unsigned Position = MemOps[I].Position;
1099 if (Position < MemOps[Latest].Position)
1100 Latest = I;
1101 else if (Position > MemOps[Earliest].Position)
1102 Earliest = I;
1103 // Prepare for next MemOp.
1104 Offset += Size;
1105 PRegNum = RegNum;
1106 }
1107
1108 // Form a candidate from the Ops collected so far.
1109 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1110 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1111 Candidate->Instrs.push_back(MemOps[C].MI);
1112 Candidate->LatestMIIdx = Latest - SIndex;
1113 Candidate->EarliestMIIdx = Earliest - SIndex;
1114 Candidate->InsertPos = MemOps[Latest].Position;
1115 if (Count == 1)
1116 CanMergeToLSMulti = CanMergeToLSDouble = false;
1117 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1118 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1119 Candidates.push_back(Candidate);
1120 // Continue after the chain.
1121 SIndex += Count;
1122 } while (SIndex < EIndex);
1123}
1124
1125static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1127 switch (Opc) {
1128 default: llvm_unreachable("Unhandled opcode!");
1129 case ARM::LDMIA:
1130 case ARM::LDMDA:
1131 case ARM::LDMDB:
1132 case ARM::LDMIB:
1133 switch (Mode) {
1134 default: llvm_unreachable("Unhandled submode!");
1135 case ARM_AM::ia: return ARM::LDMIA_UPD;
1136 case ARM_AM::ib: return ARM::LDMIB_UPD;
1137 case ARM_AM::da: return ARM::LDMDA_UPD;
1138 case ARM_AM::db: return ARM::LDMDB_UPD;
1139 }
1140 case ARM::STMIA:
1141 case ARM::STMDA:
1142 case ARM::STMDB:
1143 case ARM::STMIB:
1144 switch (Mode) {
1145 default: llvm_unreachable("Unhandled submode!");
1146 case ARM_AM::ia: return ARM::STMIA_UPD;
1147 case ARM_AM::ib: return ARM::STMIB_UPD;
1148 case ARM_AM::da: return ARM::STMDA_UPD;
1149 case ARM_AM::db: return ARM::STMDB_UPD;
1150 }
1151 case ARM::t2LDMIA:
1152 case ARM::t2LDMDB:
1153 switch (Mode) {
1154 default: llvm_unreachable("Unhandled submode!");
1155 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1156 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1157 }
1158 case ARM::t2STMIA:
1159 case ARM::t2STMDB:
1160 switch (Mode) {
1161 default: llvm_unreachable("Unhandled submode!");
1162 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1163 case ARM_AM::db: return ARM::t2STMDB_UPD;
1164 }
1165 case ARM::VLDMSIA:
1166 switch (Mode) {
1167 default: llvm_unreachable("Unhandled submode!");
1168 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1169 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1170 }
1171 case ARM::VLDMDIA:
1172 switch (Mode) {
1173 default: llvm_unreachable("Unhandled submode!");
1174 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1175 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1176 }
1177 case ARM::VSTMSIA:
1178 switch (Mode) {
1179 default: llvm_unreachable("Unhandled submode!");
1180 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1181 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1182 }
1183 case ARM::VSTMDIA:
1184 switch (Mode) {
1185 default: llvm_unreachable("Unhandled submode!");
1186 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1187 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1188 }
1189 }
1190}
1191
1192/// Check if the given instruction increments or decrements a register and
1193/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1194/// generated by the instruction are possibly read as well.
1196 ARMCC::CondCodes Pred, Register PredReg) {
1197 bool CheckCPSRDef;
1198 int Scale;
1199 switch (MI.getOpcode()) {
1200 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1201 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1202 case ARM::t2SUBri:
1203 case ARM::t2SUBspImm:
1204 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1205 case ARM::t2ADDri:
1206 case ARM::t2ADDspImm:
1207 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1208 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1209 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1210 default: return 0;
1211 }
1212
1213 Register MIPredReg;
1214 if (MI.getOperand(0).getReg() != Reg ||
1215 MI.getOperand(1).getReg() != Reg ||
1216 getInstrPredicate(MI, MIPredReg) != Pred ||
1217 MIPredReg != PredReg)
1218 return 0;
1219
1220 if (CheckCPSRDef && definesCPSR(MI))
1221 return 0;
1222 return MI.getOperand(2).getImm() * Scale;
1223}
1224
1225/// Searches for an increment or decrement of \p Reg before \p MBBI.
1228 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1229 Offset = 0;
1230 MachineBasicBlock &MBB = *MBBI->getParent();
1231 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1232 MachineBasicBlock::iterator EndMBBI = MBB.end();
1233 if (MBBI == BeginMBBI)
1234 return EndMBBI;
1235
1236 // Skip debug values.
1237 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1238 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1239 --PrevMBBI;
1240
1241 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1242 return Offset == 0 ? EndMBBI : PrevMBBI;
1243}
1244
1245/// Searches for a increment or decrement of \p Reg after \p MBBI.
1248 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1249 const TargetRegisterInfo *TRI) {
1250 Offset = 0;
1251 MachineBasicBlock &MBB = *MBBI->getParent();
1252 MachineBasicBlock::iterator EndMBBI = MBB.end();
1253 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1254 while (NextMBBI != EndMBBI) {
1255 // Skip debug values.
1256 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1257 ++NextMBBI;
1258 if (NextMBBI == EndMBBI)
1259 return EndMBBI;
1260
1261 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1262 if (Off) {
1263 Offset = Off;
1264 return NextMBBI;
1265 }
1266
1267 // SP can only be combined if it is the next instruction after the original
1268 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1269 // anything below the new pointer) when its frame elements are still in
1270 // use. Other registers can attempt to look further, until a different use
1271 // or def of the register is found.
1272 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1273 NextMBBI->definesRegister(Reg, TRI))
1274 return EndMBBI;
1275
1276 ++NextMBBI;
1277 }
1278 return EndMBBI;
1279}
1280
1281/// Fold proceeding/trailing inc/dec of base register into the
1282/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1283///
1284/// stmia rn, <ra, rb, rc>
1285/// rn := rn + 4 * 3;
1286/// =>
1287/// stmia rn!, <ra, rb, rc>
1288///
1289/// rn := rn - 4 * 3;
1290/// ldmia rn, <ra, rb, rc>
1291/// =>
1292/// ldmdb rn!, <ra, rb, rc>
1293bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1294 // Thumb1 is already using updating loads/stores.
1295 if (isThumb1) return false;
1296 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1297
1298 const MachineOperand &BaseOP = MI->getOperand(0);
1299 Register Base = BaseOP.getReg();
1300 bool BaseKill = BaseOP.isKill();
1301 Register PredReg;
1302 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1303 unsigned Opcode = MI->getOpcode();
1304 DebugLoc DL = MI->getDebugLoc();
1305
1306 // Can't use an updating ld/st if the base register is also a dest
1307 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1308 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1309 if (MO.getReg() == Base)
1310 return false;
1311
1312 int Bytes = getLSMultipleTransferSize(MI);
1313 MachineBasicBlock &MBB = *MI->getParent();
1315 int Offset;
1317 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1319 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1320 Mode = ARM_AM::db;
1321 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1322 Mode = ARM_AM::da;
1323 } else {
1324 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1325 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1326 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1327
1328 // We couldn't find an inc/dec to merge. But if the base is dead, we
1329 // can still change to a writeback form as that will save us 2 bytes
1330 // of code size. It can create WAW hazards though, so only do it if
1331 // we're minimizing code size.
1332 if (!STI->hasMinSize() || !BaseKill)
1333 return false;
1334
1335 bool HighRegsUsed = false;
1336 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1337 if (MO.getReg() >= ARM::R8) {
1338 HighRegsUsed = true;
1339 break;
1340 }
1341
1342 if (!HighRegsUsed)
1343 MergeInstr = MBB.end();
1344 else
1345 return false;
1346 }
1347 }
1348 if (MergeInstr != MBB.end()) {
1349 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1350 MBB.erase(MergeInstr);
1351 }
1352
1353 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1355 .addReg(Base, getDefRegState(true)) // WB base register
1356 .addReg(Base, getKillRegState(BaseKill))
1357 .addImm(Pred).addReg(PredReg);
1358
1359 // Transfer the rest of operands.
1360 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1361 MIB.add(MO);
1362
1363 // Transfer memoperands.
1364 MIB.setMemRefs(MI->memoperands());
1365
1366 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1367 MBB.erase(MBBI);
1368 return true;
1369}
1370
1371static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1373 switch (Opc) {
1374 case ARM::LDRi12:
1375 return ARM::LDR_PRE_IMM;
1376 case ARM::STRi12:
1377 return ARM::STR_PRE_IMM;
1378 case ARM::VLDRS:
1379 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1380 case ARM::VLDRD:
1381 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1382 case ARM::VSTRS:
1383 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1384 case ARM::VSTRD:
1385 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1386 case ARM::t2LDRi8:
1387 case ARM::t2LDRi12:
1388 return ARM::t2LDR_PRE;
1389 case ARM::t2STRi8:
1390 case ARM::t2STRi12:
1391 return ARM::t2STR_PRE;
1392 default: llvm_unreachable("Unhandled opcode!");
1393 }
1394}
1395
1396static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1398 switch (Opc) {
1399 case ARM::LDRi12:
1400 return ARM::LDR_POST_IMM;
1401 case ARM::STRi12:
1402 return ARM::STR_POST_IMM;
1403 case ARM::VLDRS:
1404 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1405 case ARM::VLDRD:
1406 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1407 case ARM::VSTRS:
1408 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1409 case ARM::VSTRD:
1410 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1411 case ARM::t2LDRi8:
1412 case ARM::t2LDRi12:
1413 return ARM::t2LDR_POST;
1414 case ARM::t2LDRBi8:
1415 case ARM::t2LDRBi12:
1416 return ARM::t2LDRB_POST;
1417 case ARM::t2LDRSBi8:
1418 case ARM::t2LDRSBi12:
1419 return ARM::t2LDRSB_POST;
1420 case ARM::t2LDRHi8:
1421 case ARM::t2LDRHi12:
1422 return ARM::t2LDRH_POST;
1423 case ARM::t2LDRSHi8:
1424 case ARM::t2LDRSHi12:
1425 return ARM::t2LDRSH_POST;
1426 case ARM::t2STRi8:
1427 case ARM::t2STRi12:
1428 return ARM::t2STR_POST;
1429 case ARM::t2STRBi8:
1430 case ARM::t2STRBi12:
1431 return ARM::t2STRB_POST;
1432 case ARM::t2STRHi8:
1433 case ARM::t2STRHi12:
1434 return ARM::t2STRH_POST;
1435
1436 case ARM::MVE_VLDRBS16:
1437 return ARM::MVE_VLDRBS16_post;
1438 case ARM::MVE_VLDRBS32:
1439 return ARM::MVE_VLDRBS32_post;
1440 case ARM::MVE_VLDRBU16:
1441 return ARM::MVE_VLDRBU16_post;
1442 case ARM::MVE_VLDRBU32:
1443 return ARM::MVE_VLDRBU32_post;
1444 case ARM::MVE_VLDRHS32:
1445 return ARM::MVE_VLDRHS32_post;
1446 case ARM::MVE_VLDRHU32:
1447 return ARM::MVE_VLDRHU32_post;
1448 case ARM::MVE_VLDRBU8:
1449 return ARM::MVE_VLDRBU8_post;
1450 case ARM::MVE_VLDRHU16:
1451 return ARM::MVE_VLDRHU16_post;
1452 case ARM::MVE_VLDRWU32:
1453 return ARM::MVE_VLDRWU32_post;
1454 case ARM::MVE_VSTRB16:
1455 return ARM::MVE_VSTRB16_post;
1456 case ARM::MVE_VSTRB32:
1457 return ARM::MVE_VSTRB32_post;
1458 case ARM::MVE_VSTRH32:
1459 return ARM::MVE_VSTRH32_post;
1460 case ARM::MVE_VSTRBU8:
1461 return ARM::MVE_VSTRBU8_post;
1462 case ARM::MVE_VSTRHU16:
1463 return ARM::MVE_VSTRHU16_post;
1464 case ARM::MVE_VSTRWU32:
1465 return ARM::MVE_VSTRWU32_post;
1466
1467 default: llvm_unreachable("Unhandled opcode!");
1468 }
1469}
1470
1471/// Fold proceeding/trailing inc/dec of base register into the
1472/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1473bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1474 // Thumb1 doesn't have updating LDR/STR.
1475 // FIXME: Use LDM/STM with single register instead.
1476 if (isThumb1) return false;
1477 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1478
1480 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1481 unsigned Opcode = MI->getOpcode();
1482 DebugLoc DL = MI->getDebugLoc();
1483 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1484 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1485 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1486 if (isi32Load(Opcode) || isi32Store(Opcode))
1487 if (MI->getOperand(2).getImm() != 0)
1488 return false;
1489 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1490 return false;
1491
1492 // Can't do the merge if the destination register is the same as the would-be
1493 // writeback register.
1494 if (MI->getOperand(0).getReg() == Base)
1495 return false;
1496
1497 Register PredReg;
1498 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1499 int Bytes = getLSMultipleTransferSize(MI);
1500 MachineBasicBlock &MBB = *MI->getParent();
1502 int Offset;
1504 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1505 unsigned NewOpc;
1506 if (!isAM5 && Offset == Bytes) {
1507 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1508 } else if (Offset == -Bytes) {
1509 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1510 } else {
1511 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1512 if (MergeInstr == MBB.end())
1513 return false;
1514
1516 if ((isAM5 && Offset != Bytes) ||
1517 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1519 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1520 return false;
1521 }
1522 }
1523 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1524 MBB.erase(MergeInstr);
1525
1527
1528 bool isLd = isLoadSingle(Opcode);
1529 if (isAM5) {
1530 // VLDM[SD]_UPD, VSTM[SD]_UPD
1531 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1532 // updating load/store-multiple instructions can be used with only one
1533 // register.)
1534 MachineOperand &MO = MI->getOperand(0);
1535 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1536 .addReg(Base, getDefRegState(true)) // WB base register
1537 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1538 .addImm(Pred)
1539 .addReg(PredReg)
1540 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1541 : getKillRegState(MO.isKill())))
1542 .cloneMemRefs(*MI);
1543 (void)MIB;
1544 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1545 } else if (isLd) {
1546 if (isAM2) {
1547 // LDR_PRE, LDR_POST
1548 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1549 auto MIB =
1550 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1551 .addReg(Base, RegState::Define)
1552 .addReg(Base)
1553 .addImm(Offset)
1554 .addImm(Pred)
1555 .addReg(PredReg)
1556 .cloneMemRefs(*MI);
1557 (void)MIB;
1558 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1559 } else {
1561 auto MIB =
1562 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1563 .addReg(Base, RegState::Define)
1564 .addReg(Base)
1565 .addReg(0)
1566 .addImm(Imm)
1567 .add(predOps(Pred, PredReg))
1568 .cloneMemRefs(*MI);
1569 (void)MIB;
1570 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1571 }
1572 } else {
1573 // t2LDR_PRE, t2LDR_POST
1574 auto MIB =
1575 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1576 .addReg(Base, RegState::Define)
1577 .addReg(Base)
1578 .addImm(Offset)
1579 .add(predOps(Pred, PredReg))
1580 .cloneMemRefs(*MI);
1581 (void)MIB;
1582 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1583 }
1584 } else {
1585 MachineOperand &MO = MI->getOperand(0);
1586 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1587 // the vestigial zero-reg offset register. When that's fixed, this clause
1588 // can be removed entirely.
1589 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1591 // STR_PRE, STR_POST
1592 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1593 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1594 .addReg(Base)
1595 .addReg(0)
1596 .addImm(Imm)
1597 .add(predOps(Pred, PredReg))
1598 .cloneMemRefs(*MI);
1599 (void)MIB;
1600 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1601 } else {
1602 // t2STR_PRE, t2STR_POST
1603 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1604 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1605 .addReg(Base)
1606 .addImm(Offset)
1607 .add(predOps(Pred, PredReg))
1608 .cloneMemRefs(*MI);
1609 (void)MIB;
1610 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1611 }
1612 }
1613 MBB.erase(MBBI);
1614
1615 return true;
1616}
1617
1618bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1619 unsigned Opcode = MI.getOpcode();
1620 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1621 "Must have t2STRDi8 or t2LDRDi8");
1622 if (MI.getOperand(3).getImm() != 0)
1623 return false;
1624 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1625
1626 // Behaviour for writeback is undefined if base register is the same as one
1627 // of the others.
1628 const MachineOperand &BaseOp = MI.getOperand(2);
1629 Register Base = BaseOp.getReg();
1630 const MachineOperand &Reg0Op = MI.getOperand(0);
1631 const MachineOperand &Reg1Op = MI.getOperand(1);
1632 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1633 return false;
1634
1635 Register PredReg;
1636 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1638 MachineBasicBlock &MBB = *MI.getParent();
1639 int Offset;
1641 PredReg, Offset);
1642 unsigned NewOpc;
1643 if (Offset == 8 || Offset == -8) {
1644 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1645 } else {
1646 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1647 if (MergeInstr == MBB.end())
1648 return false;
1649 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1650 if (!isLegalAddressImm(NewOpc, Offset, TII))
1651 return false;
1652 }
1653 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1654 MBB.erase(MergeInstr);
1655
1656 DebugLoc DL = MI.getDebugLoc();
1657 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1658 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1659 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1660 } else {
1661 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1662 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1663 }
1664 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1665 .addImm(Offset).addImm(Pred).addReg(PredReg);
1666 assert(TII->get(Opcode).getNumOperands() == 6 &&
1667 TII->get(NewOpc).getNumOperands() == 7 &&
1668 "Unexpected number of operands in Opcode specification.");
1669
1670 // Transfer implicit operands.
1671 for (const MachineOperand &MO : MI.implicit_operands())
1672 MIB.add(MO);
1673 MIB.cloneMemRefs(MI);
1674
1675 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1676 MBB.erase(MBBI);
1677 return true;
1678}
1679
1680/// Returns true if instruction is a memory operation that this pass is capable
1681/// of operating on.
1682static bool isMemoryOp(const MachineInstr &MI) {
1683 unsigned Opcode = MI.getOpcode();
1684 switch (Opcode) {
1685 case ARM::VLDRS:
1686 case ARM::VSTRS:
1687 case ARM::VLDRD:
1688 case ARM::VSTRD:
1689 case ARM::LDRi12:
1690 case ARM::STRi12:
1691 case ARM::tLDRi:
1692 case ARM::tSTRi:
1693 case ARM::tLDRspi:
1694 case ARM::tSTRspi:
1695 case ARM::t2LDRi8:
1696 case ARM::t2LDRi12:
1697 case ARM::t2STRi8:
1698 case ARM::t2STRi12:
1699 break;
1700 default:
1701 return false;
1702 }
1703 if (!MI.getOperand(1).isReg())
1704 return false;
1705
1706 // When no memory operands are present, conservatively assume unaligned,
1707 // volatile, unfoldable.
1708 if (!MI.hasOneMemOperand())
1709 return false;
1710
1711 const MachineMemOperand &MMO = **MI.memoperands_begin();
1712
1713 // Don't touch volatile memory accesses - we may be changing their order.
1714 // TODO: We could allow unordered and monotonic atomics here, but we need to
1715 // make sure the resulting ldm/stm is correctly marked as atomic.
1716 if (MMO.isVolatile() || MMO.isAtomic())
1717 return false;
1718
1719 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1720 // not.
1721 if (MMO.getAlign() < Align(4))
1722 return false;
1723
1724 // str <undef> could probably be eliminated entirely, but for now we just want
1725 // to avoid making a mess of it.
1726 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1727 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1728 return false;
1729
1730 // Likewise don't mess with references to undefined addresses.
1731 if (MI.getOperand(1).isUndef())
1732 return false;
1733
1734 return true;
1735}
1736
1739 bool isDef, unsigned NewOpc, unsigned Reg,
1740 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1741 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1742 unsigned PredReg, const TargetInstrInfo *TII,
1743 MachineInstr *MI) {
1744 if (isDef) {
1745 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1746 TII->get(NewOpc))
1747 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1748 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1749 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1750 // FIXME: This is overly conservative; the new instruction accesses 4
1751 // bytes, not 8.
1752 MIB.cloneMemRefs(*MI);
1753 } else {
1754 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1755 TII->get(NewOpc))
1756 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1757 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1758 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1759 // FIXME: This is overly conservative; the new instruction accesses 4
1760 // bytes, not 8.
1761 MIB.cloneMemRefs(*MI);
1762 }
1763}
1764
1765bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1767 MachineInstr *MI = &*MBBI;
1768 unsigned Opcode = MI->getOpcode();
1769 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1770 // if we see this opcode.
1771 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1772 return false;
1773
1774 const MachineOperand &BaseOp = MI->getOperand(2);
1775 Register BaseReg = BaseOp.getReg();
1776 Register EvenReg = MI->getOperand(0).getReg();
1777 Register OddReg = MI->getOperand(1).getReg();
1778 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1779 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1780
1781 // ARM errata 602117: LDRD with base in list may result in incorrect base
1782 // register when interrupted or faulted.
1783 bool Errata602117 = EvenReg == BaseReg &&
1784 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1785 // ARM LDRD/STRD needs consecutive registers.
1786 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1787 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1788
1789 if (!Errata602117 && !NonConsecutiveRegs)
1790 return false;
1791
1792 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1793 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1794 bool EvenDeadKill = isLd ?
1795 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1796 bool EvenUndef = MI->getOperand(0).isUndef();
1797 bool OddDeadKill = isLd ?
1798 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1799 bool OddUndef = MI->getOperand(1).isUndef();
1800 bool BaseKill = BaseOp.isKill();
1801 bool BaseUndef = BaseOp.isUndef();
1802 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1803 "register offset not handled below");
1804 int OffImm = getMemoryOpOffset(*MI);
1805 Register PredReg;
1806 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1807
1808 if (OddRegNum > EvenRegNum && OffImm == 0) {
1809 // Ascending register numbers and no offset. It's safe to change it to a
1810 // ldm or stm.
1811 unsigned NewOpc = (isLd)
1812 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1813 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1814 if (isLd) {
1815 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1816 .add(BaseOp)
1817 .addImm(Pred)
1818 .addReg(PredReg)
1819 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1820 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1821 .cloneMemRefs(*MI);
1822 ++NumLDRD2LDM;
1823 } else {
1824 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1825 .add(BaseOp)
1826 .addImm(Pred)
1827 .addReg(PredReg)
1828 .addReg(EvenReg,
1829 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1830 .addReg(OddReg,
1831 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1832 .cloneMemRefs(*MI);
1833 ++NumSTRD2STM;
1834 }
1835 } else {
1836 // Split into two instructions.
1837 unsigned NewOpc = (isLd)
1838 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1839 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1840 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1841 // so adjust and use t2LDRi12 here for that.
1842 unsigned NewOpc2 = (isLd)
1843 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1844 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1845 // If this is a load, make sure the first load does not clobber the base
1846 // register before the second load reads it.
1847 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1848 assert(!TRI->regsOverlap(OddReg, BaseReg));
1849 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1850 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1851 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1852 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1853 MI);
1854 } else {
1855 if (OddReg == EvenReg && EvenDeadKill) {
1856 // If the two source operands are the same, the kill marker is
1857 // probably on the first one. e.g.
1858 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1859 EvenDeadKill = false;
1860 OddDeadKill = true;
1861 }
1862 // Never kill the base register in the first instruction.
1863 if (EvenReg == BaseReg)
1864 EvenDeadKill = false;
1865 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1866 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1867 MI);
1868 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1869 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1870 MI);
1871 }
1872 if (isLd)
1873 ++NumLDRD2LDR;
1874 else
1875 ++NumSTRD2STR;
1876 }
1877
1878 MBBI = MBB.erase(MBBI);
1879 return true;
1880}
1881
1882/// An optimization pass to turn multiple LDR / STR ops of the same base and
1883/// incrementing offset into LDM / STM ops.
1884bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1885 MemOpQueue MemOps;
1886 unsigned CurrBase = 0;
1887 unsigned CurrOpc = ~0u;
1888 ARMCC::CondCodes CurrPred = ARMCC::AL;
1889 unsigned Position = 0;
1890 assert(Candidates.size() == 0);
1891 assert(MergeBaseCandidates.size() == 0);
1892 LiveRegsValid = false;
1893
1895 I = MBBI) {
1896 // The instruction in front of the iterator is the one we look at.
1897 MBBI = std::prev(I);
1898 if (FixInvalidRegPairOp(MBB, MBBI))
1899 continue;
1900 ++Position;
1901
1902 if (isMemoryOp(*MBBI)) {
1903 unsigned Opcode = MBBI->getOpcode();
1904 const MachineOperand &MO = MBBI->getOperand(0);
1905 Register Reg = MO.getReg();
1907 Register PredReg;
1908 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1910 if (CurrBase == 0) {
1911 // Start of a new chain.
1912 CurrBase = Base;
1913 CurrOpc = Opcode;
1914 CurrPred = Pred;
1915 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1916 continue;
1917 }
1918 // Note: No need to match PredReg in the next if.
1919 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1920 // Watch out for:
1921 // r4 := ldr [r0, #8]
1922 // r4 := ldr [r0, #4]
1923 // or
1924 // r0 := ldr [r0]
1925 // If a load overrides the base register or a register loaded by
1926 // another load in our chain, we cannot take this instruction.
1927 bool Overlap = false;
1928 if (isLoadSingle(Opcode)) {
1929 Overlap = (Base == Reg);
1930 if (!Overlap) {
1931 for (const MemOpQueueEntry &E : MemOps) {
1932 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1933 Overlap = true;
1934 break;
1935 }
1936 }
1937 }
1938 }
1939
1940 if (!Overlap) {
1941 // Check offset and sort memory operation into the current chain.
1942 if (Offset > MemOps.back().Offset) {
1943 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1944 continue;
1945 } else {
1946 MemOpQueue::iterator MI, ME;
1947 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1948 if (Offset < MI->Offset) {
1949 // Found a place to insert.
1950 break;
1951 }
1952 if (Offset == MI->Offset) {
1953 // Collision, abort.
1954 MI = ME;
1955 break;
1956 }
1957 }
1958 if (MI != MemOps.end()) {
1959 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1960 continue;
1961 }
1962 }
1963 }
1964 }
1965
1966 // Don't advance the iterator; The op will start a new chain next.
1967 MBBI = I;
1968 --Position;
1969 // Fallthrough to look into existing chain.
1970 } else if (MBBI->isDebugInstr()) {
1971 continue;
1972 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1973 MBBI->getOpcode() == ARM::t2STRDi8) {
1974 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1975 // remember them because we may still be able to merge add/sub into them.
1976 MergeBaseCandidates.push_back(&*MBBI);
1977 }
1978
1979 // If we are here then the chain is broken; Extract candidates for a merge.
1980 if (MemOps.size() > 0) {
1981 FormCandidates(MemOps);
1982 // Reset for the next chain.
1983 CurrBase = 0;
1984 CurrOpc = ~0u;
1985 CurrPred = ARMCC::AL;
1986 MemOps.clear();
1987 }
1988 }
1989 if (MemOps.size() > 0)
1990 FormCandidates(MemOps);
1991
1992 // Sort candidates so they get processed from end to begin of the basic
1993 // block later; This is necessary for liveness calculation.
1994 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1995 return M0->InsertPos < M1->InsertPos;
1996 };
1997 llvm::sort(Candidates, LessThan);
1998
1999 // Go through list of candidates and merge.
2000 bool Changed = false;
2001 for (const MergeCandidate *Candidate : Candidates) {
2002 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
2003 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
2004 // Merge preceding/trailing base inc/dec into the merged op.
2005 if (Merged) {
2006 Changed = true;
2007 unsigned Opcode = Merged->getOpcode();
2008 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2009 MergeBaseUpdateLSDouble(*Merged);
2010 else
2011 MergeBaseUpdateLSMultiple(Merged);
2012 } else {
2013 for (MachineInstr *MI : Candidate->Instrs) {
2014 if (MergeBaseUpdateLoadStore(MI))
2015 Changed = true;
2016 }
2017 }
2018 } else {
2019 assert(Candidate->Instrs.size() == 1);
2020 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2021 Changed = true;
2022 }
2023 }
2024 Candidates.clear();
2025 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2026 for (MachineInstr *MI : MergeBaseCandidates)
2027 MergeBaseUpdateLSDouble(*MI);
2028 MergeBaseCandidates.clear();
2029
2030 return Changed;
2031}
2032
2033/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2034/// into the preceding stack restore so it directly restore the value of LR
2035/// into pc.
2036/// ldmfd sp!, {..., lr}
2037/// bx lr
2038/// or
2039/// ldmfd sp!, {..., lr}
2040/// mov pc, lr
2041/// =>
2042/// ldmfd sp!, {..., pc}
2043bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2044 // Thumb1 LDM doesn't allow high registers.
2045 if (isThumb1) return false;
2046 if (MBB.empty()) return false;
2047
2049 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2050 (MBBI->getOpcode() == ARM::BX_RET ||
2051 MBBI->getOpcode() == ARM::tBX_RET ||
2052 MBBI->getOpcode() == ARM::MOVPCLR)) {
2053 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2054 // Ignore any debug instructions.
2055 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2056 --PrevI;
2057 MachineInstr &PrevMI = *PrevI;
2058 unsigned Opcode = PrevMI.getOpcode();
2059 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2060 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2061 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2062 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2063 if (MO.getReg() != ARM::LR)
2064 return false;
2065 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2066 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2067 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2068 PrevMI.setDesc(TII->get(NewOpc));
2069 MO.setReg(ARM::PC);
2070 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2071 MBB.erase(MBBI);
2072 return true;
2073 }
2074 }
2075 return false;
2076}
2077
2078bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2080 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2081 MBBI->getOpcode() != ARM::tBX_RET)
2082 return false;
2083
2085 --Prev;
2086 if (Prev->getOpcode() != ARM::tMOVr ||
2087 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2088 return false;
2089
2090 for (auto Use : Prev->uses())
2091 if (Use.isKill()) {
2092 assert(STI->hasV4TOps());
2093 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2094 .addReg(Use.getReg(), RegState::Kill)
2097 MBB.erase(MBBI);
2098 MBB.erase(Prev);
2099 return true;
2100 }
2101
2102 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2103}
2104
2105bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2106 MF = &Fn;
2107 STI = &Fn.getSubtarget<ARMSubtarget>();
2108 TL = STI->getTargetLowering();
2109 AFI = Fn.getInfo<ARMFunctionInfo>();
2110 TII = STI->getInstrInfo();
2111 TRI = STI->getRegisterInfo();
2112
2113 RegClassInfoValid = false;
2114 isThumb2 = AFI->isThumb2Function();
2115 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2116
2117 bool Modified = false, ModifiedLDMReturn = false;
2118 for (MachineBasicBlock &MBB : Fn) {
2119 Modified |= LoadStoreMultipleOpti(MBB);
2120 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2121 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2122 if (isThumb1)
2123 Modified |= CombineMovBx(MBB);
2124 }
2125 Modified |= ModifiedLDMReturn;
2126
2127 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2128 // LR is restored. This check needs to consider the whole function, not just
2129 // the instruction(s) we changed, because there may be other BX returns which
2130 // still need LR to be restored.
2131 if (ModifiedLDMReturn)
2133
2134 Allocator.DestroyAll();
2135 return Modified;
2136}
2137
2138bool ARMLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
2139 if (skipFunction(MF.getFunction()))
2140 return false;
2141 ARMLoadStoreOpt Impl;
2142 return Impl.runOnMachineFunction(MF);
2143}
2144
2145#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2146 "ARM pre- register allocation load / store optimization pass"
2147
2148namespace {
2149
2150/// Pre- register allocation pass that move load / stores from consecutive
2151/// locations close to make it more likely they will be combined later.
2152struct ARMPreAllocLoadStoreOpt {
2154 const DataLayout *TD;
2155 const TargetInstrInfo *TII;
2156 const TargetRegisterInfo *TRI;
2157 const ARMSubtarget *STI;
2160 MachineFunction *MF;
2161
2162 bool runOnMachineFunction(MachineFunction &Fn, AliasAnalysis *AA,
2164
2165private:
2166 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2167 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2168 Register &BaseReg, int &Offset, Register &PredReg,
2169 ARMCC::CondCodes &Pred, bool &isT2);
2170 bool RescheduleOps(
2172 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2174 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2175 bool DistributeIncrements();
2176 bool DistributeIncrements(Register Base);
2177};
2178
2179struct ARMPreAllocLoadStoreOptLegacy : public MachineFunctionPass {
2180 static char ID;
2181
2182 ARMPreAllocLoadStoreOptLegacy() : MachineFunctionPass(ID) {}
2183
2184 bool runOnMachineFunction(MachineFunction &Fn) override;
2185
2186 StringRef getPassName() const override {
2188 }
2189
2190 void getAnalysisUsage(AnalysisUsage &AU) const override {
2195 }
2196};
2197
2198char ARMPreAllocLoadStoreOptLegacy::ID = 0;
2199
2200} // end anonymous namespace
2201
2202INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2205INITIALIZE_PASS_END(ARMPreAllocLoadStoreOptLegacy, "arm-prera-ldst-opt",
2207
2208// Limit the number of instructions to be rescheduled.
2209// FIXME: tune this limit, and/or come up with some better heuristics.
2210static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2211 cl::init(8), cl::Hidden);
2212
2213bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn,
2214 AliasAnalysis *AAIn,
2215 MachineDominatorTree *DTIn) {
2217 return false;
2218
2219 AA = AAIn;
2220 DT = DTIn;
2221 TD = &Fn.getDataLayout();
2222 STI = &Fn.getSubtarget<ARMSubtarget>();
2223 TII = STI->getInstrInfo();
2224 TRI = STI->getRegisterInfo();
2225 MRI = &Fn.getRegInfo();
2226 MF = &Fn;
2227
2228 bool Modified = DistributeIncrements();
2229 for (MachineBasicBlock &MFI : Fn)
2230 Modified |= RescheduleLoadStoreInstrs(&MFI);
2231
2232 return Modified;
2233}
2234
2235bool ARMPreAllocLoadStoreOptLegacy::runOnMachineFunction(MachineFunction &Fn) {
2236 if (skipFunction(Fn.getFunction()))
2237 return false;
2238
2239 ARMPreAllocLoadStoreOpt Impl;
2240 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2241 MachineDominatorTree *DT =
2242 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2243 return Impl.runOnMachineFunction(Fn, AA, DT);
2244}
2245
2246static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2250 SmallSet<unsigned, 4> &MemRegs,
2251 const TargetRegisterInfo *TRI,
2252 AliasAnalysis *AA) {
2253 // Are there stores / loads / calls between them?
2254 SmallSet<unsigned, 4> AddedRegPressure;
2255 while (++I != E) {
2256 if (I->isDebugInstr() || MemOps.count(&*I))
2257 continue;
2258 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2259 return false;
2260 if (I->mayStore() || (!isLd && I->mayLoad()))
2261 for (MachineInstr *MemOp : MemOps)
2262 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2263 return false;
2264 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2265 MachineOperand &MO = I->getOperand(j);
2266 if (!MO.isReg())
2267 continue;
2268 Register Reg = MO.getReg();
2269 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2270 return false;
2271 if (Reg != Base && !MemRegs.count(Reg))
2272 AddedRegPressure.insert(Reg);
2273 }
2274 }
2275
2276 // Estimate register pressure increase due to the transformation.
2277 if (MemRegs.size() <= 4)
2278 // Ok if we are moving small number of instructions.
2279 return true;
2280 return AddedRegPressure.size() <= MemRegs.size() * 2;
2281}
2282
2283bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2284 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2285 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2286 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2287 // Make sure we're allowed to generate LDRD/STRD.
2288 if (!STI->hasV5TEOps())
2289 return false;
2290
2291 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2292 unsigned Scale = 1;
2293 unsigned Opcode = Op0->getOpcode();
2294 if (Opcode == ARM::LDRi12) {
2295 NewOpc = ARM::LDRD;
2296 } else if (Opcode == ARM::STRi12) {
2297 NewOpc = ARM::STRD;
2298 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2299 NewOpc = ARM::t2LDRDi8;
2300 Scale = 4;
2301 isT2 = true;
2302 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2303 NewOpc = ARM::t2STRDi8;
2304 Scale = 4;
2305 isT2 = true;
2306 } else {
2307 return false;
2308 }
2309
2310 // Make sure the base address satisfies i64 ld / st alignment requirement.
2311 // At the moment, we ignore the memoryoperand's value.
2312 // If we want to use AliasAnalysis, we should check it accordingly.
2313 if (!Op0->hasOneMemOperand() ||
2314 (*Op0->memoperands_begin())->isVolatile() ||
2315 (*Op0->memoperands_begin())->isAtomic())
2316 return false;
2317
2318 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2319 Align ReqAlign = STI->getDualLoadStoreAlignment();
2320 if (Alignment < ReqAlign)
2321 return false;
2322
2323 // Then make sure the immediate offset fits.
2324 int OffImm = getMemoryOpOffset(*Op0);
2325 if (isT2) {
2326 int Limit = (1 << 8) * Scale;
2327 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2328 return false;
2329 Offset = OffImm;
2330 } else {
2332 if (OffImm < 0) {
2334 OffImm = - OffImm;
2335 }
2336 int Limit = (1 << 8) * Scale;
2337 if (OffImm >= Limit || (OffImm & (Scale-1)))
2338 return false;
2339 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2340 }
2341 FirstReg = Op0->getOperand(0).getReg();
2342 SecondReg = Op1->getOperand(0).getReg();
2343 if (FirstReg == SecondReg)
2344 return false;
2345 BaseReg = Op0->getOperand(1).getReg();
2346 Pred = getInstrPredicate(*Op0, PredReg);
2347 dl = Op0->getDebugLoc();
2348 return true;
2349}
2350
2351bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2352 MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
2353 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2354 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap) {
2355 bool RetVal = false;
2356
2357 // Sort by offset (in reverse order).
2358 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2359 int LOffset = getMemoryOpOffset(*LHS);
2360 int ROffset = getMemoryOpOffset(*RHS);
2361 assert(LHS == RHS || LOffset != ROffset);
2362 return LOffset > ROffset;
2363 });
2364
2365 // The loads / stores of the same base are in order. Scan them from first to
2366 // last and check for the following:
2367 // 1. Any def of base.
2368 // 2. Any gaps.
2369 while (Ops.size() > 1) {
2370 unsigned FirstLoc = ~0U;
2371 unsigned LastLoc = 0;
2372 MachineInstr *FirstOp = nullptr;
2373 MachineInstr *LastOp = nullptr;
2374 int LastOffset = 0;
2375 unsigned LastOpcode = 0;
2376 unsigned LastBytes = 0;
2377 unsigned NumMove = 0;
2378 for (MachineInstr *Op : llvm::reverse(Ops)) {
2379 // Make sure each operation has the same kind.
2380 unsigned LSMOpcode
2381 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2382 if (LastOpcode && LSMOpcode != LastOpcode)
2383 break;
2384
2385 // Check that we have a continuous set of offsets.
2386 int Offset = getMemoryOpOffset(*Op);
2387 unsigned Bytes = getLSMultipleTransferSize(Op);
2388 if (LastBytes) {
2389 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2390 break;
2391 }
2392
2393 // Don't try to reschedule too many instructions.
2394 if (NumMove == InstReorderLimit)
2395 break;
2396
2397 // Found a mergeable instruction; save information about it.
2398 ++NumMove;
2399 LastOffset = Offset;
2400 LastBytes = Bytes;
2401 LastOpcode = LSMOpcode;
2402
2403 unsigned Loc = MI2LocMap[Op];
2404 if (Loc <= FirstLoc) {
2405 FirstLoc = Loc;
2406 FirstOp = Op;
2407 }
2408 if (Loc >= LastLoc) {
2409 LastLoc = Loc;
2410 LastOp = Op;
2411 }
2412 }
2413
2414 if (NumMove <= 1)
2415 Ops.pop_back();
2416 else {
2417 SmallPtrSet<MachineInstr*, 4> MemOps;
2418 SmallSet<unsigned, 4> MemRegs;
2419 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2420 MemOps.insert(Ops[i]);
2421 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2422 }
2423
2424 // Be conservative, if the instructions are too far apart, don't
2425 // move them. We want to limit the increase of register pressure.
2426 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2427 if (DoMove)
2428 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2429 MemOps, MemRegs, TRI, AA);
2430 if (!DoMove) {
2431 for (unsigned i = 0; i != NumMove; ++i)
2432 Ops.pop_back();
2433 } else {
2434 // This is the new location for the loads / stores.
2435 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2436 while (InsertPos != MBB->end() &&
2437 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2438 ++InsertPos;
2439
2440 // If we are moving a pair of loads / stores, see if it makes sense
2441 // to try to allocate a pair of registers that can form register pairs.
2442 MachineInstr *Op0 = Ops.back();
2443 MachineInstr *Op1 = Ops[Ops.size()-2];
2444 Register FirstReg, SecondReg;
2445 Register BaseReg, PredReg;
2447 bool isT2 = false;
2448 unsigned NewOpc = 0;
2449 int Offset = 0;
2450 DebugLoc dl;
2451 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2452 FirstReg, SecondReg, BaseReg,
2453 Offset, PredReg, Pred, isT2)) {
2454 Ops.pop_back();
2455 Ops.pop_back();
2456
2457 const MCInstrDesc &MCID = TII->get(NewOpc);
2458 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
2459 MRI->constrainRegClass(FirstReg, TRC);
2460 MRI->constrainRegClass(SecondReg, TRC);
2461
2462 // Form the pair instruction.
2463 if (isLd) {
2464 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2465 .addReg(FirstReg, RegState::Define)
2466 .addReg(SecondReg, RegState::Define)
2467 .addReg(BaseReg);
2468 // FIXME: We're converting from LDRi12 to an insn that still
2469 // uses addrmode2, so we need an explicit offset reg. It should
2470 // always by reg0 since we're transforming LDRi12s.
2471 if (!isT2)
2472 MIB.addReg(0);
2473 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2474 MIB.cloneMergedMemRefs({Op0, Op1});
2475 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2476 ++NumLDRDFormed;
2477 } else {
2478 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2479 .addReg(FirstReg)
2480 .addReg(SecondReg)
2481 .addReg(BaseReg);
2482 // FIXME: We're converting from LDRi12 to an insn that still
2483 // uses addrmode2, so we need an explicit offset reg. It should
2484 // always by reg0 since we're transforming STRi12s.
2485 if (!isT2)
2486 MIB.addReg(0);
2487 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2488 MIB.cloneMergedMemRefs({Op0, Op1});
2489 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2490 ++NumSTRDFormed;
2491 }
2492 MBB->erase(Op0);
2493 MBB->erase(Op1);
2494
2495 if (!isT2) {
2496 // Add register allocation hints to form register pairs.
2497 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2498 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2499 }
2500 } else {
2501 for (unsigned i = 0; i != NumMove; ++i) {
2502 MachineInstr *Op = Ops.pop_back_val();
2503 if (isLd) {
2504 // Populate RegisterMap with all Registers defined by loads.
2505 Register Reg = Op->getOperand(0).getReg();
2506 RegisterMap[Reg];
2507 }
2508
2509 MBB->splice(InsertPos, MBB, Op);
2510 }
2511 }
2512
2513 NumLdStMoved += NumMove;
2514 RetVal = true;
2515 }
2516 }
2517 }
2518
2519 return RetVal;
2520}
2521
2523 std::function<void(MachineOperand &)> Fn) {
2524 if (MI->isNonListDebugValue()) {
2525 auto &Op = MI->getOperand(0);
2526 if (Op.isReg())
2527 Fn(Op);
2528 } else {
2529 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2530 auto &Op = MI->getOperand(I);
2531 if (Op.isReg())
2532 Fn(Op);
2533 }
2534 }
2535}
2536
2537// Update the RegisterMap with the instruction that was moved because a
2538// DBG_VALUE_LIST may need to be moved again.
2541 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2542
2543 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2544 auto RegIt = RegisterMap.find(Op.getReg());
2545 if (RegIt == RegisterMap.end())
2546 return;
2547 auto &InstrVec = RegIt->getSecond();
2548 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2549 });
2550}
2551
2553 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2554 MI->getDebugLoc()->getInlinedAt());
2555 return DbgVar;
2556}
2557
2558bool
2559ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2560 bool RetVal = false;
2561
2562 DenseMap<MachineInstr *, unsigned> MI2LocMap;
2563 using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
2564 using BaseVec = SmallVector<unsigned, 4>;
2565 Base2InstMap Base2LdsMap;
2566 Base2InstMap Base2StsMap;
2567 BaseVec LdBases;
2568 BaseVec StBases;
2569 // This map is used to track the relationship between the virtual
2570 // register that is the result of a load that is moved and the DBG_VALUE
2571 // MachineInstr pointer that uses that virtual register.
2572 SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
2573
2574 unsigned Loc = 0;
2577 while (MBBI != E) {
2578 for (; MBBI != E; ++MBBI) {
2579 MachineInstr &MI = *MBBI;
2580 if (MI.isCall() || MI.isTerminator()) {
2581 // Stop at barriers.
2582 ++MBBI;
2583 break;
2584 }
2585
2586 if (!MI.isDebugInstr())
2587 MI2LocMap[&MI] = ++Loc;
2588
2589 if (!isMemoryOp(MI))
2590 continue;
2591 Register PredReg;
2592 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2593 continue;
2594
2595 int Opc = MI.getOpcode();
2596 bool isLd = isLoadSingle(Opc);
2597 Register Base = MI.getOperand(1).getReg();
2599 bool StopHere = false;
2600 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2601 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2602 if (Inserted) {
2603 BI->second.push_back(&MI);
2604 Bases.push_back(Base);
2605 return;
2606 }
2607 for (const MachineInstr *MI : BI->second) {
2608 if (Offset == getMemoryOpOffset(*MI)) {
2609 StopHere = true;
2610 break;
2611 }
2612 }
2613 if (!StopHere)
2614 BI->second.push_back(&MI);
2615 };
2616
2617 if (isLd)
2618 FindBases(Base2LdsMap, LdBases);
2619 else
2620 FindBases(Base2StsMap, StBases);
2621
2622 if (StopHere) {
2623 // Found a duplicate (a base+offset combination that's seen earlier).
2624 // Backtrack.
2625 --Loc;
2626 break;
2627 }
2628 }
2629
2630 // Re-schedule loads.
2631 for (unsigned Base : LdBases) {
2632 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2633 if (Lds.size() > 1)
2634 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2635 }
2636
2637 // Re-schedule stores.
2638 for (unsigned Base : StBases) {
2639 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2640 if (Sts.size() > 1)
2641 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2642 }
2643
2644 if (MBBI != E) {
2645 Base2LdsMap.clear();
2646 Base2StsMap.clear();
2647 LdBases.clear();
2648 StBases.clear();
2649 }
2650 }
2651
2652 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2653 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2654 // use-before-def, resulting in a loss of debug info.
2655
2656 // Example:
2657 // Before the Pre Register Allocation Load Store Pass
2658 // inst_a
2659 // %2 = ld ...
2660 // inst_b
2661 // DBG_VALUE %2, "x", ...
2662 // %3 = ld ...
2663
2664 // After the Pass:
2665 // inst_a
2666 // inst_b
2667 // DBG_VALUE %2, "x", ...
2668 // %2 = ld ...
2669 // %3 = ld ...
2670
2671 // The code below addresses this by moving the DBG_VALUE to the position
2672 // immediately after the load.
2673
2674 // Example:
2675 // After the code below:
2676 // inst_a
2677 // inst_b
2678 // %2 = ld ...
2679 // DBG_VALUE %2, "x", ...
2680 // %3 = ld ...
2681
2682 // The algorithm works in two phases: First RescheduleOps() populates the
2683 // RegisterMap with registers that were moved as keys, there is no value
2684 // inserted. In the next phase, every MachineInstr in a basic block is
2685 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2686 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2687 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2688 // describes debug information for a variable that already exists in the
2689 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2690 // be set to undef. If the current MachineInstr is a load that was moved,
2691 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2692 // the load.
2693
2694 // To illustrate the above algorithm visually let's take this example.
2695
2696 // Before the Pre Register Allocation Load Store Pass:
2697 // %2 = ld ...
2698 // DBG_VALUE %2, A, .... # X
2699 // DBG_VALUE 0, A, ... # Y
2700 // %3 = ld ...
2701 // DBG_VALUE %3, A, ..., # Z
2702 // %4 = ld ...
2703
2704 // After Pre Register Allocation Load Store Pass:
2705 // DBG_VALUE %2, A, .... # X
2706 // DBG_VALUE 0, A, ... # Y
2707 // DBG_VALUE %3, A, ..., # Z
2708 // %2 = ld ...
2709 // %3 = ld ...
2710 // %4 = ld ...
2711
2712 // The algorithm below does the following:
2713
2714 // In the beginning, the RegisterMap will have been populated with the virtual
2715 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2716 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2717 // InstrMap {}
2718 // -> DBG_VALUE %2, A, .... # X
2719 // DBG_VALUE 0, A, ... # Y
2720 // DBG_VALUE %3, A, ..., # Z
2721 // %2 = ld ...
2722 // %3 = ld ...
2723 // %4 = ld ...
2724
2725 // After the first DBG_VALUE (denoted with an X) is processed, the
2726 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2727 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2728 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2729 // DBG_VALUE %2, A, .... # X
2730 // -> DBG_VALUE 0, A, ... # Y
2731 // DBG_VALUE %3, A, ..., # Z
2732 // %2 = ld ...
2733 // %3 = ld ...
2734 // %4 = ld ...
2735
2736 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2737 // to now hold Y for A and the RegisterMap is also updated to remove X from
2738 // %2, this is because both X and Y describe the same debug variable A. X is
2739 // also updated to have a $noreg as the first operand.
2740 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2741 // InstrMap = {X-> 2}
2742 // DBG_VALUE $noreg, A, .... # X
2743 // DBG_VALUE 0, A, ... # Y
2744 // -> DBG_VALUE %3, A, ..., # Z
2745 // %2 = ld ...
2746 // %3 = ld ...
2747 // %4 = ld ...
2748
2749 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2750 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2751 // is updated to have Z mapped to %3. This is again because Z describes the
2752 // debug variable A, Y is not updated to have $noreg as first operand because
2753 // its first operand is an immediate, not a register.
2754 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2755 // InstrMap = {X -> 2, Z -> 3}
2756 // DBG_VALUE $noreg, A, .... # X
2757 // DBG_VALUE 0, A, ... # Y
2758 // DBG_VALUE %3, A, ..., # Z
2759 // -> %2 = ld ...
2760 // %3 = ld ...
2761 // %4 = ld ...
2762
2763 // Nothing happens here since the RegisterMap for %2 contains no value.
2764 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2765 // InstrMap = {X -> 2, Z -> 3}
2766 // DBG_VALUE $noreg, A, .... # X
2767 // DBG_VALUE 0, A, ... # Y
2768 // DBG_VALUE %3, A, ..., # Z
2769 // %2 = ld ...
2770 // -> %3 = ld ...
2771 // %4 = ld ...
2772
2773 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2774 // pointer Z is copied to come after the load for %3 and the old Z's first
2775 // operand is changed to $noreg the Basic Block iterator is moved to after the
2776 // DBG_VALUE Z's new position.
2777 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2778 // InstrMap = {X -> 2, Z -> 3}
2779 // DBG_VALUE $noreg, A, .... # X
2780 // DBG_VALUE 0, A, ... # Y
2781 // DBG_VALUE $noreg, A, ..., # Old Z
2782 // %2 = ld ...
2783 // %3 = ld ...
2784 // DBG_VALUE %3, A, ..., # Z
2785 // -> %4 = ld ...
2786
2787 // Nothing happens for %4 and the algorithm exits having processed the entire
2788 // Basic Block.
2789 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2790 // InstrMap = {X -> 2, Z -> 3}
2791 // DBG_VALUE $noreg, A, .... # X
2792 // DBG_VALUE 0, A, ... # Y
2793 // DBG_VALUE $noreg, A, ..., # Old Z
2794 // %2 = ld ...
2795 // %3 = ld ...
2796 // DBG_VALUE %3, A, ..., # Z
2797 // %4 = ld ...
2798
2799 // This map is used to track the relationship between
2800 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2801 // debug information for that Debug Variable.
2802 SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
2803 // This map is used to track the relationship between a DBG_VALUE or
2804 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2805 SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
2806 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2807 MachineInstr &MI = *MBBI;
2808
2809 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2810 auto RegIt = RegisterMap.find(Reg);
2811 if (RegIt == RegisterMap.end())
2812 return;
2813 auto &InstrVec = RegIt->getSecond();
2814 InstrVec.push_back(&MI);
2815 InstrMap[&MI].push_back(Reg);
2816 };
2817
2818 if (MI.isDebugValue()) {
2819 assert(MI.getDebugVariable() &&
2820 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2821
2823 // If the first operand is a register and it exists in the RegisterMap, we
2824 // know this is a DBG_VALUE that uses the result of a load that was moved,
2825 // and is therefore a candidate to also be moved, add it to the
2826 // RegisterMap and InstrMap.
2827 forEachDbgRegOperand(&MI, [&](MachineOperand &Op) {
2828 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2829 });
2830
2831 // If the current DBG_VALUE describes the same variable as one of the
2832 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2833 // undef. Moving one DBG_VALUE past another would result in the variable's
2834 // value going back in time when stepping through the block in the
2835 // debugger.
2836 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2837 if (InstrIt != DbgValueSinkCandidates.end()) {
2838 auto *Instr = InstrIt->getSecond();
2839 auto RegIt = InstrMap.find(Instr);
2840 if (RegIt != InstrMap.end()) {
2841 const auto &RegVec = RegIt->getSecond();
2842 // For every Register in the RegVec, remove the MachineInstr in the
2843 // RegisterMap that describes the DbgVar.
2844 for (auto &Reg : RegVec) {
2845 auto RegIt = RegisterMap.find(Reg);
2846 if (RegIt == RegisterMap.end())
2847 continue;
2848 auto &InstrVec = RegIt->getSecond();
2849 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2851 return Var == DbgVar;
2852 };
2853
2854 llvm::erase_if(InstrVec, IsDbgVar);
2855 }
2857 [&](MachineOperand &Op) { Op.setReg(0); });
2858 }
2859 }
2860 DbgValueSinkCandidates[DbgVar] = &MI;
2861 } else {
2862 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2863 // then move that DBG_VALUE to below the load.
2864 auto Opc = MI.getOpcode();
2865 if (!isLoadSingle(Opc))
2866 continue;
2867 auto Reg = MI.getOperand(0).getReg();
2868 auto RegIt = RegisterMap.find(Reg);
2869 if (RegIt == RegisterMap.end())
2870 continue;
2871 auto &DbgInstrVec = RegIt->getSecond();
2872 if (!DbgInstrVec.size())
2873 continue;
2874 for (auto *DbgInstr : DbgInstrVec) {
2875 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2876 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2877 MBB->insert(InsertPos, ClonedMI);
2878 MBBI++;
2879 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2880 // that was moved.
2881 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2882 // Erase DbgVar from DbgValueSinkCandidates if still present. If the
2883 // instruction is a DBG_VALUE_LIST, it may have already been erased from
2884 // DbgValueSinkCandidates.
2885 DbgValueSinkCandidates.erase(DbgVar);
2886 // Zero out original dbg instr
2887 forEachDbgRegOperand(DbgInstr,
2888 [&](MachineOperand &Op) { Op.setReg(0); });
2889 // Update RegisterMap with ClonedMI because it might have to be moved
2890 // again.
2891 if (DbgInstr->isDebugValueList())
2892 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2893 DbgInstr);
2894 }
2895 }
2896 }
2897 return RetVal;
2898}
2899
2900// Get the Base register operand index from the memory access MachineInst if we
2901// should attempt to distribute postinc on it. Return -1 if not of a valid
2902// instruction type. If it returns an index, it is assumed that instruction is a
2903// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2905 switch (MI.getOpcode()) {
2906 case ARM::MVE_VLDRBS16:
2907 case ARM::MVE_VLDRBS32:
2908 case ARM::MVE_VLDRBU16:
2909 case ARM::MVE_VLDRBU32:
2910 case ARM::MVE_VLDRHS32:
2911 case ARM::MVE_VLDRHU32:
2912 case ARM::MVE_VLDRBU8:
2913 case ARM::MVE_VLDRHU16:
2914 case ARM::MVE_VLDRWU32:
2915 case ARM::MVE_VSTRB16:
2916 case ARM::MVE_VSTRB32:
2917 case ARM::MVE_VSTRH32:
2918 case ARM::MVE_VSTRBU8:
2919 case ARM::MVE_VSTRHU16:
2920 case ARM::MVE_VSTRWU32:
2921 case ARM::t2LDRHi8:
2922 case ARM::t2LDRHi12:
2923 case ARM::t2LDRSHi8:
2924 case ARM::t2LDRSHi12:
2925 case ARM::t2LDRBi8:
2926 case ARM::t2LDRBi12:
2927 case ARM::t2LDRSBi8:
2928 case ARM::t2LDRSBi12:
2929 case ARM::t2STRBi8:
2930 case ARM::t2STRBi12:
2931 case ARM::t2STRHi8:
2932 case ARM::t2STRHi12:
2933 return 1;
2934 case ARM::MVE_VLDRBS16_post:
2935 case ARM::MVE_VLDRBS32_post:
2936 case ARM::MVE_VLDRBU16_post:
2937 case ARM::MVE_VLDRBU32_post:
2938 case ARM::MVE_VLDRHS32_post:
2939 case ARM::MVE_VLDRHU32_post:
2940 case ARM::MVE_VLDRBU8_post:
2941 case ARM::MVE_VLDRHU16_post:
2942 case ARM::MVE_VLDRWU32_post:
2943 case ARM::MVE_VSTRB16_post:
2944 case ARM::MVE_VSTRB32_post:
2945 case ARM::MVE_VSTRH32_post:
2946 case ARM::MVE_VSTRBU8_post:
2947 case ARM::MVE_VSTRHU16_post:
2948 case ARM::MVE_VSTRWU32_post:
2949 case ARM::MVE_VLDRBS16_pre:
2950 case ARM::MVE_VLDRBS32_pre:
2951 case ARM::MVE_VLDRBU16_pre:
2952 case ARM::MVE_VLDRBU32_pre:
2953 case ARM::MVE_VLDRHS32_pre:
2954 case ARM::MVE_VLDRHU32_pre:
2955 case ARM::MVE_VLDRBU8_pre:
2956 case ARM::MVE_VLDRHU16_pre:
2957 case ARM::MVE_VLDRWU32_pre:
2958 case ARM::MVE_VSTRB16_pre:
2959 case ARM::MVE_VSTRB32_pre:
2960 case ARM::MVE_VSTRH32_pre:
2961 case ARM::MVE_VSTRBU8_pre:
2962 case ARM::MVE_VSTRHU16_pre:
2963 case ARM::MVE_VSTRWU32_pre:
2964 return 2;
2965 }
2966 return -1;
2967}
2968
2970 switch (MI.getOpcode()) {
2971 case ARM::MVE_VLDRBS16_post:
2972 case ARM::MVE_VLDRBS32_post:
2973 case ARM::MVE_VLDRBU16_post:
2974 case ARM::MVE_VLDRBU32_post:
2975 case ARM::MVE_VLDRHS32_post:
2976 case ARM::MVE_VLDRHU32_post:
2977 case ARM::MVE_VLDRBU8_post:
2978 case ARM::MVE_VLDRHU16_post:
2979 case ARM::MVE_VLDRWU32_post:
2980 case ARM::MVE_VSTRB16_post:
2981 case ARM::MVE_VSTRB32_post:
2982 case ARM::MVE_VSTRH32_post:
2983 case ARM::MVE_VSTRBU8_post:
2984 case ARM::MVE_VSTRHU16_post:
2985 case ARM::MVE_VSTRWU32_post:
2986 return true;
2987 }
2988 return false;
2989}
2990
2992 switch (MI.getOpcode()) {
2993 case ARM::MVE_VLDRBS16_pre:
2994 case ARM::MVE_VLDRBS32_pre:
2995 case ARM::MVE_VLDRBU16_pre:
2996 case ARM::MVE_VLDRBU32_pre:
2997 case ARM::MVE_VLDRHS32_pre:
2998 case ARM::MVE_VLDRHU32_pre:
2999 case ARM::MVE_VLDRBU8_pre:
3000 case ARM::MVE_VLDRHU16_pre:
3001 case ARM::MVE_VLDRWU32_pre:
3002 case ARM::MVE_VSTRB16_pre:
3003 case ARM::MVE_VSTRB32_pre:
3004 case ARM::MVE_VSTRH32_pre:
3005 case ARM::MVE_VSTRBU8_pre:
3006 case ARM::MVE_VSTRHU16_pre:
3007 case ARM::MVE_VSTRWU32_pre:
3008 return true;
3009 }
3010 return false;
3011}
3012
3013// Given a memory access Opcode, check that the give Imm would be a valid Offset
3014// for this instruction (same as isLegalAddressImm), Or if the instruction
3015// could be easily converted to one where that was valid. For example converting
3016// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
3017// AdjustBaseAndOffset below.
3018static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm,
3019 const TargetInstrInfo *TII,
3020 int &CodesizeEstimate) {
3021 if (isLegalAddressImm(Opcode, Imm, TII))
3022 return true;
3023
3024 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
3025 const MCInstrDesc &Desc = TII->get(Opcode);
3026 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
3027 switch (AddrMode) {
3029 CodesizeEstimate += 1;
3030 return Imm < 0 && -Imm < ((1 << 8) * 1);
3031 }
3032 return false;
3033}
3034
3035// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3036// by -Offset. This can either happen in-place or be a replacement as MI is
3037// converted to another instruction type.
3039 int Offset, const TargetInstrInfo *TII,
3040 const TargetRegisterInfo *TRI) {
3041 // Set the Base reg
3042 unsigned BaseOp = getBaseOperandIndex(*MI);
3043 MI->getOperand(BaseOp).setReg(NewBaseReg);
3044 // and constrain the reg class to that required by the instruction.
3045 MachineFunction *MF = MI->getMF();
3046 MachineRegisterInfo &MRI = MF->getRegInfo();
3047 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3048 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp);
3049 MRI.constrainRegClass(NewBaseReg, TRC);
3050
3051 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3052 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3053 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3054 else {
3055 unsigned ConvOpcode;
3056 switch (MI->getOpcode()) {
3057 case ARM::t2LDRHi12:
3058 ConvOpcode = ARM::t2LDRHi8;
3059 break;
3060 case ARM::t2LDRSHi12:
3061 ConvOpcode = ARM::t2LDRSHi8;
3062 break;
3063 case ARM::t2LDRBi12:
3064 ConvOpcode = ARM::t2LDRBi8;
3065 break;
3066 case ARM::t2LDRSBi12:
3067 ConvOpcode = ARM::t2LDRSBi8;
3068 break;
3069 case ARM::t2STRHi12:
3070 ConvOpcode = ARM::t2STRHi8;
3071 break;
3072 case ARM::t2STRBi12:
3073 ConvOpcode = ARM::t2STRBi8;
3074 break;
3075 default:
3076 llvm_unreachable("Unhandled convertible opcode");
3077 }
3078 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3079 "Illegal Address Immediate after convert!");
3080
3081 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3082 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3083 .add(MI->getOperand(0))
3084 .add(MI->getOperand(1))
3085 .addImm(OldOffset - Offset)
3086 .add(MI->getOperand(3))
3087 .add(MI->getOperand(4))
3088 .cloneMemRefs(*MI);
3089 MI->eraseFromParent();
3090 }
3091}
3092
3094 Register NewReg,
3095 const TargetInstrInfo *TII,
3096 const TargetRegisterInfo *TRI) {
3097 MachineFunction *MF = MI->getMF();
3098 MachineRegisterInfo &MRI = MF->getRegInfo();
3099
3100 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3101 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3102
3103 const MCInstrDesc &MCID = TII->get(NewOpcode);
3104 // Constrain the def register class
3105 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
3106 MRI.constrainRegClass(NewReg, TRC);
3107 // And do the same for the base operand
3108 TRC = TII->getRegClass(MCID, 2);
3109 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3110
3111 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3112 switch (AddrMode) {
3116 // Any MVE load/store
3117 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3118 .addReg(NewReg, RegState::Define)
3119 .add(MI->getOperand(0))
3120 .add(MI->getOperand(1))
3121 .addImm(Offset)
3122 .add(MI->getOperand(3))
3123 .add(MI->getOperand(4))
3124 .add(MI->getOperand(5))
3125 .cloneMemRefs(*MI);
3127 if (MI->mayLoad()) {
3128 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3129 .add(MI->getOperand(0))
3130 .addReg(NewReg, RegState::Define)
3131 .add(MI->getOperand(1))
3132 .addImm(Offset)
3133 .add(MI->getOperand(3))
3134 .add(MI->getOperand(4))
3135 .cloneMemRefs(*MI);
3136 } else {
3137 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3138 .addReg(NewReg, RegState::Define)
3139 .add(MI->getOperand(0))
3140 .add(MI->getOperand(1))
3141 .addImm(Offset)
3142 .add(MI->getOperand(3))
3143 .add(MI->getOperand(4))
3144 .cloneMemRefs(*MI);
3145 }
3146 default:
3147 llvm_unreachable("Unhandled createPostIncLoadStore");
3148 }
3149}
3150
3151// Given a Base Register, optimise the load/store uses to attempt to create more
3152// post-inc accesses and less register moves. We do this by taking zero offset
3153// loads/stores with an add, and convert them to a postinc load/store of the
3154// same type. Any subsequent accesses will be adjusted to use and account for
3155// the post-inc value.
3156// For example:
3157// LDR #0 LDR_POSTINC #16
3158// LDR #4 LDR #-12
3159// LDR #8 LDR #-8
3160// LDR #12 LDR #-4
3161// ADD #16
3162//
3163// At the same time if we do not find an increment but do find an existing
3164// pre/post inc instruction, we can still adjust the offsets of subsequent
3165// instructions to save the register move that would otherwise be needed for the
3166// in-place increment.
3167bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3168 // We are looking for:
3169 // One zero offset load/store that can become postinc
3170 MachineInstr *BaseAccess = nullptr;
3171 MachineInstr *PrePostInc = nullptr;
3172 // An increment that can be folded in
3173 MachineInstr *Increment = nullptr;
3174 // Other accesses after BaseAccess that will need to be updated to use the
3175 // postinc value.
3176 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3177 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3178 if (!Increment && getAddSubImmediate(Use) != 0) {
3179 Increment = &Use;
3180 continue;
3181 }
3182
3183 int BaseOp = getBaseOperandIndex(Use);
3184 if (BaseOp == -1)
3185 return false;
3186
3187 if (!Use.getOperand(BaseOp).isReg() ||
3188 Use.getOperand(BaseOp).getReg() != Base)
3189 return false;
3190 if (isPreIndex(Use) || isPostIndex(Use))
3191 PrePostInc = &Use;
3192 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3193 BaseAccess = &Use;
3194 else
3195 OtherAccesses.insert(&Use);
3196 }
3197
3198 int IncrementOffset;
3199 Register NewBaseReg;
3200 if (BaseAccess && Increment) {
3201 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3202 return false;
3203 Register PredReg;
3204 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3206 return false;
3207
3208 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3209 << Base.virtRegIndex() << "\n");
3210
3211 // Make sure that Increment has no uses before BaseAccess that are not PHI
3212 // uses.
3213 for (MachineInstr &Use :
3214 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3215 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3216 !DT->dominates(BaseAccess, &Use))) {
3217 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3218 return false;
3219 }
3220 }
3221
3222 // Make sure that Increment can be folded into Base
3223 IncrementOffset = getAddSubImmediate(*Increment);
3224 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3225 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3226 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3227 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3228 return false;
3229 }
3230 }
3231 else if (PrePostInc) {
3232 // If we already have a pre/post index load/store then set BaseAccess,
3233 // IncrementOffset and NewBaseReg to the values it already produces,
3234 // allowing us to update and subsequent uses of BaseOp reg with the
3235 // incremented value.
3236 if (Increment)
3237 return false;
3238
3239 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3240 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3241 int BaseOp = getBaseOperandIndex(*PrePostInc);
3242 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3243 BaseAccess = PrePostInc;
3244 NewBaseReg = PrePostInc->getOperand(0).getReg();
3245 }
3246 else
3247 return false;
3248
3249 // And make sure that the negative value of increment can be added to all
3250 // other offsets after the BaseAccess. We rely on either
3251 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3252 // to keep things simple.
3253 // This also adds a simple codesize metric, to detect if an instruction (like
3254 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3255 // cannot because it is converted to something else (t2LDRBi8). We start this
3256 // at -1 for the gain from removing the increment.
3257 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3258 int CodesizeEstimate = -1;
3259 for (auto *Use : OtherAccesses) {
3260 if (DT->dominates(BaseAccess, Use)) {
3261 SuccessorAccesses.insert(Use);
3262 unsigned BaseOp = getBaseOperandIndex(*Use);
3263 if (!isLegalOrConvertibleAddressImm(Use->getOpcode(),
3264 Use->getOperand(BaseOp + 1).getImm() -
3265 IncrementOffset,
3266 TII, CodesizeEstimate)) {
3267 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3268 return false;
3269 }
3270 } else if (!DT->dominates(Use, BaseAccess)) {
3271 LLVM_DEBUG(
3272 dbgs() << " Unknown dominance relation between Base and Use\n");
3273 return false;
3274 }
3275 }
3276 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3277 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3278 return false;
3279 }
3280
3281 if (!PrePostInc) {
3282 // Replace BaseAccess with a post inc
3283 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3284 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3285 NewBaseReg = Increment->getOperand(0).getReg();
3286 MachineInstr *BaseAccessPost =
3287 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3288 BaseAccess->eraseFromParent();
3289 Increment->eraseFromParent();
3290 (void)BaseAccessPost;
3291 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3292 }
3293
3294 for (auto *Use : SuccessorAccesses) {
3295 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3296 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3297 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3298 }
3299
3300 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3301 // remain.
3302 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3303 Op.setIsKill(false);
3304 return true;
3305}
3306
3307bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3308 bool Changed = false;
3309 SmallSetVector<Register, 4> Visited;
3310 for (auto &MBB : *MF) {
3311 for (auto &MI : MBB) {
3312 int BaseOp = getBaseOperandIndex(MI);
3313 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3314 continue;
3315
3316 Register Base = MI.getOperand(BaseOp).getReg();
3317 if (!Base.isVirtual())
3318 continue;
3319
3320 Visited.insert(Base);
3321 }
3322 }
3323
3324 for (auto Base : Visited)
3325 Changed |= DistributeIncrements(Base);
3326
3327 return Changed;
3328}
3329
3330/// Returns an instance of the load / store optimization pass.
3332 if (PreAlloc)
3333 return new ARMPreAllocLoadStoreOptLegacy();
3334 return new ARMLoadStoreOptLegacy();
3335}
3336
3340 ARMLoadStoreOpt Impl;
3341 bool Changed = Impl.runOnMachineFunction(MF);
3342 if (!Changed)
3343 return PreservedAnalyses::all();
3346 return PA;
3347}
3348
3352 ARMPreAllocLoadStoreOpt Impl;
3353 AliasAnalysis *AA =
3355 .getManager()
3356 .getResult<AAManager>(MF.getFunction());
3358 bool Changed = Impl.runOnMachineFunction(MF, AA, DT);
3359 if (!Changed)
3360 return PreservedAnalyses::all();
3363 return PA;
3364}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static int getMemoryOpOffset(const MachineInstr &MI)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static bool isLegalOrConvertibleAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasMinSize() const
bool isCortexM3() const
Align getDualLoadStoreAlignment() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:328
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Describe properties that are true of each instruction in the target description file.
MachineInstrBundleIterator< const MachineInstr > const_iterator
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
LLVM Value Representation.
Definition Value.h:75
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:49
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
This namespace contains all of the command line option processing machinery.
Definition CommandLine.h:52
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1660
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Op::Description Desc
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr RegState getDefRegState(bool B)
FunctionPass * createARMLoadStoreOptLegacyPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1909
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
int getAddSubImmediate(MachineInstr &MI)
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
constexpr RegState getUndefRegState(bool B)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39