LLVM  4.0.0
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Support/Debug.h"
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "aarch64-ldst-opt"
35 
36 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
37 STATISTIC(NumPostFolded, "Number of post-index updates folded");
38 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
39 STATISTIC(NumUnscaledPairCreated,
40  "Number of load/store from unscaled generated");
41 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
42 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
43 
44 // The LdStLimit limits how far we search for load/store pairs.
45 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
46  cl::init(20), cl::Hidden);
47 
48 // The UpdateLimit limits how far we search for update instructions when we form
49 // pre-/post-index instructions.
50 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
51  cl::Hidden);
52 
53 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
54 
55 namespace {
56 
57 typedef struct LdStPairFlags {
58  // If a matching instruction is found, MergeForward is set to true if the
59  // merge is to remove the first instruction and replace the second with
60  // a pair-wise insn, and false if the reverse is true.
61  bool MergeForward;
62 
63  // SExtIdx gives the index of the result of the load pair that must be
64  // extended. The value of SExtIdx assumes that the paired load produces the
65  // value in this order: (I, returned iterator), i.e., -1 means no value has
66  // to be extended, 0 means I, and 1 means the returned iterator.
67  int SExtIdx;
68 
69  LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
70 
71  void setMergeForward(bool V = true) { MergeForward = V; }
72  bool getMergeForward() const { return MergeForward; }
73 
74  void setSExtIdx(int V) { SExtIdx = V; }
75  int getSExtIdx() const { return SExtIdx; }
76 
77 } LdStPairFlags;
78 
79 struct AArch64LoadStoreOpt : public MachineFunctionPass {
80  static char ID;
81  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
83  }
84 
85  const AArch64InstrInfo *TII;
86  const TargetRegisterInfo *TRI;
87  const AArch64Subtarget *Subtarget;
88 
89  // Track which registers have been modified and used.
90  BitVector ModifiedRegs, UsedRegs;
91 
92  // Scan the instructions looking for a load/store that can be combined
93  // with the current instruction into a load/store pair.
94  // Return the matching instruction if one is found, else MBB->end().
96  LdStPairFlags &Flags,
97  unsigned Limit,
98  bool FindNarrowMerge);
99 
100  // Scan the instructions looking for a store that writes to the address from
101  // which the current load instruction reads. Return true if one is found.
102  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
104 
105  // Merge the two instructions indicated into a wider narrow store instruction.
107  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
109  const LdStPairFlags &Flags);
110 
111  // Merge the two instructions indicated into a single pair-wise instruction.
113  mergePairedInsns(MachineBasicBlock::iterator I,
115  const LdStPairFlags &Flags);
116 
117  // Promote the load that reads directly from the address stored to.
119  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
121 
122  // Scan the instruction list to find a base register update that can
123  // be combined with the current instruction (a load or store) using
124  // pre or post indexed addressing with writeback. Scan forwards.
126  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
127  int UnscaledOffset, unsigned Limit);
128 
129  // Scan the instruction list to find a base register update that can
130  // be combined with the current instruction (a load or store) using
131  // pre or post indexed addressing with writeback. Scan backwards.
133  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
134 
135  // Find an instruction that updates the base register of the ld/st
136  // instruction.
137  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
138  unsigned BaseReg, int Offset);
139 
140  // Merge a pre- or post-index base register update into a ld/st instruction.
142  mergeUpdateInsn(MachineBasicBlock::iterator I,
143  MachineBasicBlock::iterator Update, bool IsPreIdx);
144 
145  // Find and merge zero store instructions.
146  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
147 
148  // Find and pair ldr/str instructions.
149  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
150 
151  // Find and promote load instructions which read directly from store.
152  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
153 
154  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
155 
156  bool runOnMachineFunction(MachineFunction &Fn) override;
157 
158  MachineFunctionProperties getRequiredProperties() const override {
161  }
162 
163  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
164 };
165 char AArch64LoadStoreOpt::ID = 0;
166 } // namespace
167 
168 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
169  AARCH64_LOAD_STORE_OPT_NAME, false, false)
170 
171 static bool isNarrowStore(unsigned Opc) {
172  switch (Opc) {
173  default:
174  return false;
175  case AArch64::STRBBui:
176  case AArch64::STURBBi:
177  case AArch64::STRHHui:
178  case AArch64::STURHHi:
179  return true;
180  }
181 }
182 
183 // Scaling factor for unscaled load or store.
185  switch (MI.getOpcode()) {
186  default:
187  llvm_unreachable("Opcode has unknown scale!");
188  case AArch64::LDRBBui:
189  case AArch64::LDURBBi:
190  case AArch64::LDRSBWui:
191  case AArch64::LDURSBWi:
192  case AArch64::STRBBui:
193  case AArch64::STURBBi:
194  return 1;
195  case AArch64::LDRHHui:
196  case AArch64::LDURHHi:
197  case AArch64::LDRSHWui:
198  case AArch64::LDURSHWi:
199  case AArch64::STRHHui:
200  case AArch64::STURHHi:
201  return 2;
202  case AArch64::LDRSui:
203  case AArch64::LDURSi:
204  case AArch64::LDRSWui:
205  case AArch64::LDURSWi:
206  case AArch64::LDRWui:
207  case AArch64::LDURWi:
208  case AArch64::STRSui:
209  case AArch64::STURSi:
210  case AArch64::STRWui:
211  case AArch64::STURWi:
212  case AArch64::LDPSi:
213  case AArch64::LDPSWi:
214  case AArch64::LDPWi:
215  case AArch64::STPSi:
216  case AArch64::STPWi:
217  return 4;
218  case AArch64::LDRDui:
219  case AArch64::LDURDi:
220  case AArch64::LDRXui:
221  case AArch64::LDURXi:
222  case AArch64::STRDui:
223  case AArch64::STURDi:
224  case AArch64::STRXui:
225  case AArch64::STURXi:
226  case AArch64::LDPDi:
227  case AArch64::LDPXi:
228  case AArch64::STPDi:
229  case AArch64::STPXi:
230  return 8;
231  case AArch64::LDRQui:
232  case AArch64::LDURQi:
233  case AArch64::STRQui:
234  case AArch64::STURQi:
235  case AArch64::LDPQi:
236  case AArch64::STPQi:
237  return 16;
238  }
239 }
240 
241 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
242  bool *IsValidLdStrOpc = nullptr) {
243  if (IsValidLdStrOpc)
244  *IsValidLdStrOpc = true;
245  switch (Opc) {
246  default:
247  if (IsValidLdStrOpc)
248  *IsValidLdStrOpc = false;
249  return UINT_MAX;
250  case AArch64::STRDui:
251  case AArch64::STURDi:
252  case AArch64::STRQui:
253  case AArch64::STURQi:
254  case AArch64::STRBBui:
255  case AArch64::STURBBi:
256  case AArch64::STRHHui:
257  case AArch64::STURHHi:
258  case AArch64::STRWui:
259  case AArch64::STURWi:
260  case AArch64::STRXui:
261  case AArch64::STURXi:
262  case AArch64::LDRDui:
263  case AArch64::LDURDi:
264  case AArch64::LDRQui:
265  case AArch64::LDURQi:
266  case AArch64::LDRWui:
267  case AArch64::LDURWi:
268  case AArch64::LDRXui:
269  case AArch64::LDURXi:
270  case AArch64::STRSui:
271  case AArch64::STURSi:
272  case AArch64::LDRSui:
273  case AArch64::LDURSi:
274  return Opc;
275  case AArch64::LDRSWui:
276  return AArch64::LDRWui;
277  case AArch64::LDURSWi:
278  return AArch64::LDURWi;
279  }
280 }
281 
282 static unsigned getMatchingWideOpcode(unsigned Opc) {
283  switch (Opc) {
284  default:
285  llvm_unreachable("Opcode has no wide equivalent!");
286  case AArch64::STRBBui:
287  return AArch64::STRHHui;
288  case AArch64::STRHHui:
289  return AArch64::STRWui;
290  case AArch64::STURBBi:
291  return AArch64::STURHHi;
292  case AArch64::STURHHi:
293  return AArch64::STURWi;
294  case AArch64::STURWi:
295  return AArch64::STURXi;
296  case AArch64::STRWui:
297  return AArch64::STRXui;
298  }
299 }
300 
301 static unsigned getMatchingPairOpcode(unsigned Opc) {
302  switch (Opc) {
303  default:
304  llvm_unreachable("Opcode has no pairwise equivalent!");
305  case AArch64::STRSui:
306  case AArch64::STURSi:
307  return AArch64::STPSi;
308  case AArch64::STRDui:
309  case AArch64::STURDi:
310  return AArch64::STPDi;
311  case AArch64::STRQui:
312  case AArch64::STURQi:
313  return AArch64::STPQi;
314  case AArch64::STRWui:
315  case AArch64::STURWi:
316  return AArch64::STPWi;
317  case AArch64::STRXui:
318  case AArch64::STURXi:
319  return AArch64::STPXi;
320  case AArch64::LDRSui:
321  case AArch64::LDURSi:
322  return AArch64::LDPSi;
323  case AArch64::LDRDui:
324  case AArch64::LDURDi:
325  return AArch64::LDPDi;
326  case AArch64::LDRQui:
327  case AArch64::LDURQi:
328  return AArch64::LDPQi;
329  case AArch64::LDRWui:
330  case AArch64::LDURWi:
331  return AArch64::LDPWi;
332  case AArch64::LDRXui:
333  case AArch64::LDURXi:
334  return AArch64::LDPXi;
335  case AArch64::LDRSWui:
336  case AArch64::LDURSWi:
337  return AArch64::LDPSWi;
338  }
339 }
340 
343  unsigned LdOpc = LoadInst.getOpcode();
344  unsigned StOpc = StoreInst.getOpcode();
345  switch (LdOpc) {
346  default:
347  llvm_unreachable("Unsupported load instruction!");
348  case AArch64::LDRBBui:
349  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
350  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
351  case AArch64::LDURBBi:
352  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
353  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
354  case AArch64::LDRHHui:
355  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
356  StOpc == AArch64::STRXui;
357  case AArch64::LDURHHi:
358  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
359  StOpc == AArch64::STURXi;
360  case AArch64::LDRWui:
361  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
362  case AArch64::LDURWi:
363  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
364  case AArch64::LDRXui:
365  return StOpc == AArch64::STRXui;
366  case AArch64::LDURXi:
367  return StOpc == AArch64::STURXi;
368  }
369 }
370 
371 static unsigned getPreIndexedOpcode(unsigned Opc) {
372  switch (Opc) {
373  default:
374  llvm_unreachable("Opcode has no pre-indexed equivalent!");
375  case AArch64::STRSui:
376  return AArch64::STRSpre;
377  case AArch64::STRDui:
378  return AArch64::STRDpre;
379  case AArch64::STRQui:
380  return AArch64::STRQpre;
381  case AArch64::STRBBui:
382  return AArch64::STRBBpre;
383  case AArch64::STRHHui:
384  return AArch64::STRHHpre;
385  case AArch64::STRWui:
386  return AArch64::STRWpre;
387  case AArch64::STRXui:
388  return AArch64::STRXpre;
389  case AArch64::LDRSui:
390  return AArch64::LDRSpre;
391  case AArch64::LDRDui:
392  return AArch64::LDRDpre;
393  case AArch64::LDRQui:
394  return AArch64::LDRQpre;
395  case AArch64::LDRBBui:
396  return AArch64::LDRBBpre;
397  case AArch64::LDRHHui:
398  return AArch64::LDRHHpre;
399  case AArch64::LDRWui:
400  return AArch64::LDRWpre;
401  case AArch64::LDRXui:
402  return AArch64::LDRXpre;
403  case AArch64::LDRSWui:
404  return AArch64::LDRSWpre;
405  case AArch64::LDPSi:
406  return AArch64::LDPSpre;
407  case AArch64::LDPSWi:
408  return AArch64::LDPSWpre;
409  case AArch64::LDPDi:
410  return AArch64::LDPDpre;
411  case AArch64::LDPQi:
412  return AArch64::LDPQpre;
413  case AArch64::LDPWi:
414  return AArch64::LDPWpre;
415  case AArch64::LDPXi:
416  return AArch64::LDPXpre;
417  case AArch64::STPSi:
418  return AArch64::STPSpre;
419  case AArch64::STPDi:
420  return AArch64::STPDpre;
421  case AArch64::STPQi:
422  return AArch64::STPQpre;
423  case AArch64::STPWi:
424  return AArch64::STPWpre;
425  case AArch64::STPXi:
426  return AArch64::STPXpre;
427  }
428 }
429 
430 static unsigned getPostIndexedOpcode(unsigned Opc) {
431  switch (Opc) {
432  default:
433  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
434  case AArch64::STRSui:
435  return AArch64::STRSpost;
436  case AArch64::STRDui:
437  return AArch64::STRDpost;
438  case AArch64::STRQui:
439  return AArch64::STRQpost;
440  case AArch64::STRBBui:
441  return AArch64::STRBBpost;
442  case AArch64::STRHHui:
443  return AArch64::STRHHpost;
444  case AArch64::STRWui:
445  return AArch64::STRWpost;
446  case AArch64::STRXui:
447  return AArch64::STRXpost;
448  case AArch64::LDRSui:
449  return AArch64::LDRSpost;
450  case AArch64::LDRDui:
451  return AArch64::LDRDpost;
452  case AArch64::LDRQui:
453  return AArch64::LDRQpost;
454  case AArch64::LDRBBui:
455  return AArch64::LDRBBpost;
456  case AArch64::LDRHHui:
457  return AArch64::LDRHHpost;
458  case AArch64::LDRWui:
459  return AArch64::LDRWpost;
460  case AArch64::LDRXui:
461  return AArch64::LDRXpost;
462  case AArch64::LDRSWui:
463  return AArch64::LDRSWpost;
464  case AArch64::LDPSi:
465  return AArch64::LDPSpost;
466  case AArch64::LDPSWi:
467  return AArch64::LDPSWpost;
468  case AArch64::LDPDi:
469  return AArch64::LDPDpost;
470  case AArch64::LDPQi:
471  return AArch64::LDPQpost;
472  case AArch64::LDPWi:
473  return AArch64::LDPWpost;
474  case AArch64::LDPXi:
475  return AArch64::LDPXpost;
476  case AArch64::STPSi:
477  return AArch64::STPSpost;
478  case AArch64::STPDi:
479  return AArch64::STPDpost;
480  case AArch64::STPQi:
481  return AArch64::STPQpost;
482  case AArch64::STPWi:
483  return AArch64::STPWpost;
484  case AArch64::STPXi:
485  return AArch64::STPXpost;
486  }
487 }
488 
489 static bool isPairedLdSt(const MachineInstr &MI) {
490  switch (MI.getOpcode()) {
491  default:
492  return false;
493  case AArch64::LDPSi:
494  case AArch64::LDPSWi:
495  case AArch64::LDPDi:
496  case AArch64::LDPQi:
497  case AArch64::LDPWi:
498  case AArch64::LDPXi:
499  case AArch64::STPSi:
500  case AArch64::STPDi:
501  case AArch64::STPQi:
502  case AArch64::STPWi:
503  case AArch64::STPXi:
504  return true;
505  }
506 }
507 
509  unsigned PairedRegOp = 0) {
510  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
511  unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
512  return MI.getOperand(Idx);
513 }
514 
516  unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
517  return MI.getOperand(Idx);
518 }
519 
521  unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
522  return MI.getOperand(Idx);
523 }
524 
527  const AArch64InstrInfo *TII) {
528  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
529  int LoadSize = getMemScale(LoadInst);
530  int StoreSize = getMemScale(StoreInst);
531  int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
532  ? getLdStOffsetOp(StoreInst).getImm()
533  : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
534  int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst)
535  ? getLdStOffsetOp(LoadInst).getImm()
536  : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
537  return (UnscaledStOffset <= UnscaledLdOffset) &&
538  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
539 }
540 
542  unsigned Opc = MI.getOpcode();
543  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
544  isNarrowStore(Opc)) &&
545  getLdStRegOp(MI).getReg() == AArch64::WZR;
546 }
547 
549 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
551  const LdStPairFlags &Flags) {
553  "Expected promotable zero stores.");
554 
556  ++NextI;
557  // If NextI is the second of the two instructions to be merged, we need
558  // to skip one further. Either way we merge will invalidate the iterator,
559  // and we don't need to scan the new instruction, as it's a pairwise
560  // instruction, which we're not considering for further action anyway.
561  if (NextI == MergeMI)
562  ++NextI;
563 
564  unsigned Opc = I->getOpcode();
565  bool IsScaled = !TII->isUnscaledLdSt(Opc);
566  int OffsetStride = IsScaled ? 1 : getMemScale(*I);
567 
568  bool MergeForward = Flags.getMergeForward();
569  // Insert our new paired instruction after whichever of the paired
570  // instructions MergeForward indicates.
571  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
572  // Also based on MergeForward is from where we copy the base register operand
573  // so we get the flags compatible with the input code.
574  const MachineOperand &BaseRegOp =
575  MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
576 
577  // Which register is Rt and which is Rt2 depends on the offset order.
578  MachineInstr *RtMI;
579  if (getLdStOffsetOp(*I).getImm() ==
580  getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
581  RtMI = &*MergeMI;
582  else
583  RtMI = &*I;
584 
585  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
586  // Change the scaled offset from small to large type.
587  if (IsScaled) {
588  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
589  OffsetImm /= 2;
590  }
591 
592  // Construct the new instruction.
593  DebugLoc DL = I->getDebugLoc();
594  MachineBasicBlock *MBB = I->getParent();
596  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
597  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
598  .addOperand(BaseRegOp)
599  .addImm(OffsetImm)
600  .setMemRefs(I->mergeMemRefsWith(*MergeMI));
601  (void)MIB;
602 
603  DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
604  DEBUG(I->print(dbgs()));
605  DEBUG(dbgs() << " ");
606  DEBUG(MergeMI->print(dbgs()));
607  DEBUG(dbgs() << " with instruction:\n ");
608  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
609  DEBUG(dbgs() << "\n");
610 
611  // Erase the old instructions.
612  I->eraseFromParent();
613  MergeMI->eraseFromParent();
614  return NextI;
615 }
616 
618 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
620  const LdStPairFlags &Flags) {
622  ++NextI;
623  // If NextI is the second of the two instructions to be merged, we need
624  // to skip one further. Either way we merge will invalidate the iterator,
625  // and we don't need to scan the new instruction, as it's a pairwise
626  // instruction, which we're not considering for further action anyway.
627  if (NextI == Paired)
628  ++NextI;
629 
630  int SExtIdx = Flags.getSExtIdx();
631  unsigned Opc =
632  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
633  bool IsUnscaled = TII->isUnscaledLdSt(Opc);
634  int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
635 
636  bool MergeForward = Flags.getMergeForward();
637  // Insert our new paired instruction after whichever of the paired
638  // instructions MergeForward indicates.
639  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
640  // Also based on MergeForward is from where we copy the base register operand
641  // so we get the flags compatible with the input code.
642  const MachineOperand &BaseRegOp =
643  MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
644 
645  int Offset = getLdStOffsetOp(*I).getImm();
646  int PairedOffset = getLdStOffsetOp(*Paired).getImm();
647  bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode());
648  if (IsUnscaled != PairedIsUnscaled) {
649  // We're trying to pair instructions that differ in how they are scaled. If
650  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
651  // the opposite (i.e., make Paired's offset unscaled).
652  int MemSize = getMemScale(*Paired);
653  if (PairedIsUnscaled) {
654  // If the unscaled offset isn't a multiple of the MemSize, we can't
655  // pair the operations together.
656  assert(!(PairedOffset % getMemScale(*Paired)) &&
657  "Offset should be a multiple of the stride!");
658  PairedOffset /= MemSize;
659  } else {
660  PairedOffset *= MemSize;
661  }
662  }
663 
664  // Which register is Rt and which is Rt2 depends on the offset order.
665  MachineInstr *RtMI, *Rt2MI;
666  if (Offset == PairedOffset + OffsetStride) {
667  RtMI = &*Paired;
668  Rt2MI = &*I;
669  // Here we swapped the assumption made for SExtIdx.
670  // I.e., we turn ldp I, Paired into ldp Paired, I.
671  // Update the index accordingly.
672  if (SExtIdx != -1)
673  SExtIdx = (SExtIdx + 1) % 2;
674  } else {
675  RtMI = &*I;
676  Rt2MI = &*Paired;
677  }
678  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
679  // Scale the immediate offset, if necessary.
680  if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
681  assert(!(OffsetImm % getMemScale(*RtMI)) &&
682  "Unscaled offset cannot be scaled.");
683  OffsetImm /= getMemScale(*RtMI);
684  }
685 
686  // Construct the new instruction.
688  DebugLoc DL = I->getDebugLoc();
689  MachineBasicBlock *MBB = I->getParent();
690  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
691  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
692  // Kill flags may become invalid when moving stores for pairing.
693  if (RegOp0.isUse()) {
694  if (!MergeForward) {
695  // Clear kill flags on store if moving upwards. Example:
696  // STRWui %w0, ...
697  // USE %w1
698  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
699  RegOp0.setIsKill(false);
700  RegOp1.setIsKill(false);
701  } else {
702  // Clear kill flags of the first stores register. Example:
703  // STRWui %w1, ...
704  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
705  // STRW %w0
706  unsigned Reg = getLdStRegOp(*I).getReg();
707  for (MachineInstr &MI : make_range(std::next(I), Paired))
708  MI.clearRegisterKills(Reg, TRI);
709  }
710  }
711  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
712  .addOperand(RegOp0)
713  .addOperand(RegOp1)
714  .addOperand(BaseRegOp)
715  .addImm(OffsetImm)
716  .setMemRefs(I->mergeMemRefsWith(*Paired));
717 
718  (void)MIB;
719 
720  DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
721  DEBUG(I->print(dbgs()));
722  DEBUG(dbgs() << " ");
723  DEBUG(Paired->print(dbgs()));
724  DEBUG(dbgs() << " with instruction:\n ");
725  if (SExtIdx != -1) {
726  // Generate the sign extension for the proper result of the ldp.
727  // I.e., with X1, that would be:
728  // %W1<def> = KILL %W1, %X1<imp-def>
729  // %X1<def> = SBFMXri %X1<kill>, 0, 31
730  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
731  // Right now, DstMO has the extended register, since it comes from an
732  // extended opcode.
733  unsigned DstRegX = DstMO.getReg();
734  // Get the W variant of that register.
735  unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
736  // Update the result of LDP to use the W instead of the X variant.
737  DstMO.setReg(DstRegW);
738  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
739  DEBUG(dbgs() << "\n");
740  // Make the machine verifier happy by providing a definition for
741  // the X register.
742  // Insert this definition right after the generated LDP, i.e., before
743  // InsertionPoint.
744  MachineInstrBuilder MIBKill =
745  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
746  .addReg(DstRegW)
747  .addReg(DstRegX, RegState::Define);
748  MIBKill->getOperand(2).setImplicit();
749  // Create the sign extension.
750  MachineInstrBuilder MIBSXTW =
751  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
752  .addReg(DstRegX)
753  .addImm(0)
754  .addImm(31);
755  (void)MIBSXTW;
756  DEBUG(dbgs() << " Extend operand:\n ");
757  DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
758  } else {
759  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
760  }
761  DEBUG(dbgs() << "\n");
762 
763  // Erase the old instructions.
764  I->eraseFromParent();
765  Paired->eraseFromParent();
766 
767  return NextI;
768 }
769 
771 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
773  MachineBasicBlock::iterator NextI = LoadI;
774  ++NextI;
775 
776  int LoadSize = getMemScale(*LoadI);
777  int StoreSize = getMemScale(*StoreI);
778  unsigned LdRt = getLdStRegOp(*LoadI).getReg();
779  unsigned StRt = getLdStRegOp(*StoreI).getReg();
780  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
781 
782  assert((IsStoreXReg ||
783  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
784  "Unexpected RegClass");
785 
786  MachineInstr *BitExtMI;
787  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
788  // Remove the load, if the destination register of the loads is the same
789  // register for stored value.
790  if (StRt == LdRt && LoadSize == 8) {
791  StoreI->clearRegisterKills(StRt, TRI);
792  DEBUG(dbgs() << "Remove load instruction:\n ");
793  DEBUG(LoadI->print(dbgs()));
794  DEBUG(dbgs() << "\n");
795  LoadI->eraseFromParent();
796  return NextI;
797  }
798  // Replace the load with a mov if the load and store are in the same size.
799  BitExtMI =
800  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
801  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
802  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
803  .addReg(StRt)
805  } else {
806  // FIXME: Currently we disable this transformation in big-endian targets as
807  // performance and correctness are verified only in little-endian.
808  if (!Subtarget->isLittleEndian())
809  return NextI;
810  bool IsUnscaled = TII->isUnscaledLdSt(*LoadI);
811  assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&
812  "Unsupported ld/st match");
813  assert(LoadSize <= StoreSize && "Invalid load size");
814  int UnscaledLdOffset = IsUnscaled
815  ? getLdStOffsetOp(*LoadI).getImm()
816  : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
817  int UnscaledStOffset = IsUnscaled
818  ? getLdStOffsetOp(*StoreI).getImm()
819  : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
820  int Width = LoadSize * 8;
821  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
822  int Imms = Immr + Width - 1;
823  unsigned DestReg = IsStoreXReg
824  ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
825  &AArch64::GPR64RegClass)
826  : LdRt;
827 
828  assert((UnscaledLdOffset >= UnscaledStOffset &&
829  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
830  "Invalid offset");
831 
832  Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
833  Imms = Immr + Width - 1;
834  if (UnscaledLdOffset == UnscaledStOffset) {
835  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
836  | ((Immr) << 6) // immr
837  | ((Imms) << 0) // imms
838  ;
839 
840  BitExtMI =
841  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
842  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
843  DestReg)
844  .addReg(StRt)
845  .addImm(AndMaskEncoded);
846  } else {
847  BitExtMI =
848  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
849  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
850  DestReg)
851  .addReg(StRt)
852  .addImm(Immr)
853  .addImm(Imms);
854  }
855  }
856 
857  // Clear kill flags between store and load.
858  for (MachineInstr &MI : make_range(StoreI->getIterator(),
859  BitExtMI->getIterator()))
860  MI.clearRegisterKills(StRt, TRI);
861 
862  DEBUG(dbgs() << "Promoting load by replacing :\n ");
863  DEBUG(StoreI->print(dbgs()));
864  DEBUG(dbgs() << " ");
865  DEBUG(LoadI->print(dbgs()));
866  DEBUG(dbgs() << " with instructions:\n ");
867  DEBUG(StoreI->print(dbgs()));
868  DEBUG(dbgs() << " ");
869  DEBUG((BitExtMI)->print(dbgs()));
870  DEBUG(dbgs() << "\n");
871 
872  // Erase the old instructions.
873  LoadI->eraseFromParent();
874  return NextI;
875 }
876 
877 /// trackRegDefsUses - Remember what registers the specified instruction uses
878 /// and modifies.
879 static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,
880  BitVector &UsedRegs,
881  const TargetRegisterInfo *TRI) {
882  for (const MachineOperand &MO : MI.operands()) {
883  if (MO.isRegMask())
884  ModifiedRegs.setBitsNotInMask(MO.getRegMask());
885 
886  if (!MO.isReg())
887  continue;
888  unsigned Reg = MO.getReg();
889  if (!Reg)
890  continue;
891  if (MO.isDef()) {
892  // WZR/XZR are not modified even when used as a destination register.
893  if (Reg != AArch64::WZR && Reg != AArch64::XZR)
894  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
895  ModifiedRegs.set(*AI);
896  } else {
897  assert(MO.isUse() && "Reg operand not a def and not a use?!?");
898  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
899  UsedRegs.set(*AI);
900  }
901  }
902 }
903 
904 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
905  // Convert the byte-offset used by unscaled into an "element" offset used
906  // by the scaled pair load/store instructions.
907  if (IsUnscaled) {
908  // If the byte-offset isn't a multiple of the stride, there's no point
909  // trying to match it.
910  if (Offset % OffsetStride)
911  return false;
912  Offset /= OffsetStride;
913  }
914  return Offset <= 63 && Offset >= -64;
915 }
916 
917 // Do alignment, specialized to power of 2 and for signed ints,
918 // avoiding having to do a C-style cast from uint_64t to int when
919 // using alignTo from include/llvm/Support/MathExtras.h.
920 // FIXME: Move this function to include/MathExtras.h?
921 static int alignTo(int Num, int PowOf2) {
922  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
923 }
924 
925 static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
926  const AArch64InstrInfo *TII) {
927  // One of the instructions must modify memory.
928  if (!MIa.mayStore() && !MIb.mayStore())
929  return false;
930 
931  // Both instructions must be memory operations.
932  if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
933  return false;
934 
935  return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
936 }
937 
938 static bool mayAlias(MachineInstr &MIa,
940  const AArch64InstrInfo *TII) {
941  for (MachineInstr *MIb : MemInsns)
942  if (mayAlias(MIa, *MIb, TII))
943  return true;
944 
945  return false;
946 }
947 
948 bool AArch64LoadStoreOpt::findMatchingStore(
949  MachineBasicBlock::iterator I, unsigned Limit,
950  MachineBasicBlock::iterator &StoreI) {
951  MachineBasicBlock::iterator B = I->getParent()->begin();
953  MachineInstr &LoadMI = *I;
954  unsigned BaseReg = getLdStBaseOp(LoadMI).getReg();
955 
956  // If the load is the first instruction in the block, there's obviously
957  // not any matching store.
958  if (MBBI == B)
959  return false;
960 
961  // Track which registers have been modified and used between the first insn
962  // and the second insn.
963  ModifiedRegs.reset();
964  UsedRegs.reset();
965 
966  unsigned Count = 0;
967  do {
968  --MBBI;
969  MachineInstr &MI = *MBBI;
970 
971  // Don't count transient instructions towards the search limit since there
972  // may be different numbers of them if e.g. debug information is present.
973  if (!MI.isTransient())
974  ++Count;
975 
976  // If the load instruction reads directly from the address to which the
977  // store instruction writes and the stored value is not modified, we can
978  // promote the load. Since we do not handle stores with pre-/post-index,
979  // it's unnecessary to check if BaseReg is modified by the store itself.
980  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
981  BaseReg == getLdStBaseOp(MI).getReg() &&
982  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
983  !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
984  StoreI = MBBI;
985  return true;
986  }
987 
988  if (MI.isCall())
989  return false;
990 
991  // Update modified / uses register lists.
992  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
993 
994  // Otherwise, if the base register is modified, we have no match, so
995  // return early.
996  if (ModifiedRegs[BaseReg])
997  return false;
998 
999  // If we encounter a store aliased with the load, return early.
1000  if (MI.mayStore() && mayAlias(LoadMI, MI, TII))
1001  return false;
1002  } while (MBBI != B && Count < Limit);
1003  return false;
1004 }
1005 
1006 // Returns true if FirstMI and MI are candidates for merging or pairing.
1007 // Otherwise, returns false.
1009  LdStPairFlags &Flags,
1010  const AArch64InstrInfo *TII) {
1011  // If this is volatile or if pairing is suppressed, not a candidate.
1012  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1013  return false;
1014 
1015  // We should have already checked FirstMI for pair suppression and volatility.
1016  assert(!FirstMI.hasOrderedMemoryRef() &&
1017  !TII->isLdStPairSuppressed(FirstMI) &&
1018  "FirstMI shouldn't get here if either of these checks are true.");
1019 
1020  unsigned OpcA = FirstMI.getOpcode();
1021  unsigned OpcB = MI.getOpcode();
1022 
1023  // Opcodes match: nothing more to check.
1024  if (OpcA == OpcB)
1025  return true;
1026 
1027  // Try to match a sign-extended load/store with a zero-extended load/store.
1028  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1029  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1030  assert(IsValidLdStrOpc &&
1031  "Given Opc should be a Load or Store with an immediate");
1032  // OpcA will be the first instruction in the pair.
1033  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1034  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1035  return true;
1036  }
1037 
1038  // If the second instruction isn't even a mergable/pairable load/store, bail
1039  // out.
1040  if (!PairIsValidLdStrOpc)
1041  return false;
1042 
1043  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1044  // offsets.
1045  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1046  return false;
1047 
1048  // Try to match an unscaled load/store with a scaled load/store.
1049  return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) &&
1051 
1052  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1053 }
1054 
1055 /// Scan the instructions looking for a load/store that can be combined with the
1056 /// current instruction into a wider equivalent or a load/store pair.
1058 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1059  LdStPairFlags &Flags, unsigned Limit,
1060  bool FindNarrowMerge) {
1061  MachineBasicBlock::iterator E = I->getParent()->end();
1063  MachineInstr &FirstMI = *I;
1064  ++MBBI;
1065 
1066  bool MayLoad = FirstMI.mayLoad();
1067  bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
1068  unsigned Reg = getLdStRegOp(FirstMI).getReg();
1069  unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
1070  int Offset = getLdStOffsetOp(FirstMI).getImm();
1071  int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
1072  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1073 
1074  // Track which registers have been modified and used between the first insn
1075  // (inclusive) and the second insn.
1076  ModifiedRegs.reset();
1077  UsedRegs.reset();
1078 
1079  // Remember any instructions that read/write memory between FirstMI and MI.
1081 
1082  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1083  MachineInstr &MI = *MBBI;
1084 
1085  // Don't count transient instructions towards the search limit since there
1086  // may be different numbers of them if e.g. debug information is present.
1087  if (!MI.isTransient())
1088  ++Count;
1089 
1090  Flags.setSExtIdx(-1);
1091  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1092  getLdStOffsetOp(MI).isImm()) {
1093  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1094  // If we've found another instruction with the same opcode, check to see
1095  // if the base and offset are compatible with our starting instruction.
1096  // These instructions all have scaled immediate operands, so we just
1097  // check for +1/-1. Make sure to check the new instruction offset is
1098  // actually an immediate and not a symbolic reference destined for
1099  // a relocation.
1100  unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
1101  int MIOffset = getLdStOffsetOp(MI).getImm();
1102  bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
1103  if (IsUnscaled != MIIsUnscaled) {
1104  // We're trying to pair instructions that differ in how they are scaled.
1105  // If FirstMI is scaled then scale the offset of MI accordingly.
1106  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1107  int MemSize = getMemScale(MI);
1108  if (MIIsUnscaled) {
1109  // If the unscaled offset isn't a multiple of the MemSize, we can't
1110  // pair the operations together: bail and keep looking.
1111  if (MIOffset % MemSize) {
1112  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1113  MemInsns.push_back(&MI);
1114  continue;
1115  }
1116  MIOffset /= MemSize;
1117  } else {
1118  MIOffset *= MemSize;
1119  }
1120  }
1121 
1122  if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
1123  (Offset + OffsetStride == MIOffset))) {
1124  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1125  if (FindNarrowMerge) {
1126  // If the alignment requirements of the scaled wide load/store
1127  // instruction can't express the offset of the scaled narrow input,
1128  // bail and keep looking. For promotable zero stores, allow only when
1129  // the stored value is the same (i.e., WZR).
1130  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1131  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1132  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1133  MemInsns.push_back(&MI);
1134  continue;
1135  }
1136  } else {
1137  // Pairwise instructions have a 7-bit signed offset field. Single
1138  // insns have a 12-bit unsigned offset field. If the resultant
1139  // immediate offset of merging these instructions is out of range for
1140  // a pairwise instruction, bail and keep looking.
1141  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1142  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1143  MemInsns.push_back(&MI);
1144  continue;
1145  }
1146  // If the alignment requirements of the paired (scaled) instruction
1147  // can't express the offset of the unscaled input, bail and keep
1148  // looking.
1149  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1150  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1151  MemInsns.push_back(&MI);
1152  continue;
1153  }
1154  }
1155  // If the destination register of the loads is the same register, bail
1156  // and keep looking. A load-pair instruction with both destination
1157  // registers the same is UNPREDICTABLE and will result in an exception.
1158  if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
1159  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1160  MemInsns.push_back(&MI);
1161  continue;
1162  }
1163 
1164  // If the Rt of the second instruction was not modified or used between
1165  // the two instructions and none of the instructions between the second
1166  // and first alias with the second, we can combine the second into the
1167  // first.
1168  if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
1169  !(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
1170  !mayAlias(MI, MemInsns, TII)) {
1171  Flags.setMergeForward(false);
1172  return MBBI;
1173  }
1174 
1175  // Likewise, if the Rt of the first instruction is not modified or used
1176  // between the two instructions and none of the instructions between the
1177  // first and the second alias with the first, we can combine the first
1178  // into the second.
1179  if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
1180  !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
1181  !mayAlias(FirstMI, MemInsns, TII)) {
1182  Flags.setMergeForward(true);
1183  return MBBI;
1184  }
1185  // Unable to combine these instructions due to interference in between.
1186  // Keep looking.
1187  }
1188  }
1189 
1190  // If the instruction wasn't a matching load or store. Stop searching if we
1191  // encounter a call instruction that might modify memory.
1192  if (MI.isCall())
1193  return E;
1194 
1195  // Update modified / uses register lists.
1196  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1197 
1198  // Otherwise, if the base register is modified, we have no match, so
1199  // return early.
1200  if (ModifiedRegs[BaseReg])
1201  return E;
1202 
1203  // Update list of instructions that read/write memory.
1204  if (MI.mayLoadOrStore())
1205  MemInsns.push_back(&MI);
1206  }
1207  return E;
1208 }
1209 
1211 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1213  bool IsPreIdx) {
1214  assert((Update->getOpcode() == AArch64::ADDXri ||
1215  Update->getOpcode() == AArch64::SUBXri) &&
1216  "Unexpected base register update instruction to merge!");
1218  // Return the instruction following the merged instruction, which is
1219  // the instruction following our unmerged load. Unless that's the add/sub
1220  // instruction we're merging, in which case it's the one after that.
1221  if (++NextI == Update)
1222  ++NextI;
1223 
1224  int Value = Update->getOperand(2).getImm();
1225  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1226  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1227  if (Update->getOpcode() == AArch64::SUBXri)
1228  Value = -Value;
1229 
1230  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1231  : getPostIndexedOpcode(I->getOpcode());
1232  MachineInstrBuilder MIB;
1233  if (!isPairedLdSt(*I)) {
1234  // Non-paired instruction.
1235  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1236  .addOperand(getLdStRegOp(*Update))
1237  .addOperand(getLdStRegOp(*I))
1239  .addImm(Value)
1240  .setMemRefs(I->memoperands_begin(), I->memoperands_end());
1241  } else {
1242  // Paired instruction.
1243  int Scale = getMemScale(*I);
1244  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1245  .addOperand(getLdStRegOp(*Update))
1246  .addOperand(getLdStRegOp(*I, 0))
1247  .addOperand(getLdStRegOp(*I, 1))
1249  .addImm(Value / Scale)
1250  .setMemRefs(I->memoperands_begin(), I->memoperands_end());
1251  }
1252  (void)MIB;
1253 
1254  if (IsPreIdx)
1255  DEBUG(dbgs() << "Creating pre-indexed load/store.");
1256  else
1257  DEBUG(dbgs() << "Creating post-indexed load/store.");
1258  DEBUG(dbgs() << " Replacing instructions:\n ");
1259  DEBUG(I->print(dbgs()));
1260  DEBUG(dbgs() << " ");
1261  DEBUG(Update->print(dbgs()));
1262  DEBUG(dbgs() << " with instruction:\n ");
1263  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1264  DEBUG(dbgs() << "\n");
1265 
1266  // Erase the old instructions for the block.
1267  I->eraseFromParent();
1268  Update->eraseFromParent();
1269 
1270  return NextI;
1271 }
1272 
1273 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1274  MachineInstr &MI,
1275  unsigned BaseReg, int Offset) {
1276  switch (MI.getOpcode()) {
1277  default:
1278  break;
1279  case AArch64::SUBXri:
1280  case AArch64::ADDXri:
1281  // Make sure it's a vanilla immediate operand, not a relocation or
1282  // anything else we can't handle.
1283  if (!MI.getOperand(2).isImm())
1284  break;
1285  // Watch out for 1 << 12 shifted value.
1287  break;
1288 
1289  // The update instruction source and destination register must be the
1290  // same as the load/store base register.
1291  if (MI.getOperand(0).getReg() != BaseReg ||
1292  MI.getOperand(1).getReg() != BaseReg)
1293  break;
1294 
1295  bool IsPairedInsn = isPairedLdSt(MemMI);
1296  int UpdateOffset = MI.getOperand(2).getImm();
1297  if (MI.getOpcode() == AArch64::SUBXri)
1298  UpdateOffset = -UpdateOffset;
1299 
1300  // For non-paired load/store instructions, the immediate must fit in a
1301  // signed 9-bit integer.
1302  if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
1303  break;
1304 
1305  // For paired load/store instructions, the immediate must be a multiple of
1306  // the scaling factor. The scaled offset must also fit into a signed 7-bit
1307  // integer.
1308  if (IsPairedInsn) {
1309  int Scale = getMemScale(MemMI);
1310  if (UpdateOffset % Scale != 0)
1311  break;
1312 
1313  int ScaledOffset = UpdateOffset / Scale;
1314  if (ScaledOffset > 63 || ScaledOffset < -64)
1315  break;
1316  }
1317 
1318  // If we have a non-zero Offset, we check that it matches the amount
1319  // we're adding to the register.
1320  if (!Offset || Offset == UpdateOffset)
1321  return true;
1322  break;
1323  }
1324  return false;
1325 }
1326 
1327 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1328  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1329  MachineBasicBlock::iterator E = I->getParent()->end();
1330  MachineInstr &MemMI = *I;
1332 
1333  unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
1334  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
1335 
1336  // Scan forward looking for post-index opportunities. Updating instructions
1337  // can't be formed if the memory instruction doesn't have the offset we're
1338  // looking for.
1339  if (MIUnscaledOffset != UnscaledOffset)
1340  return E;
1341 
1342  // If the base register overlaps a destination register, we can't
1343  // merge the update.
1344  bool IsPairedInsn = isPairedLdSt(MemMI);
1345  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1346  unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
1347  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1348  return E;
1349  }
1350 
1351  // Track which registers have been modified and used between the first insn
1352  // (inclusive) and the second insn.
1353  ModifiedRegs.reset();
1354  UsedRegs.reset();
1355  ++MBBI;
1356  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
1357  MachineInstr &MI = *MBBI;
1358 
1359  // Don't count transient instructions towards the search limit since there
1360  // may be different numbers of them if e.g. debug information is present.
1361  if (!MI.isTransient())
1362  ++Count;
1363 
1364  // If we found a match, return it.
1365  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1366  return MBBI;
1367 
1368  // Update the status of what the instruction clobbered and used.
1369  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1370 
1371  // Otherwise, if the base register is used or modified, we have no match, so
1372  // return early.
1373  if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1374  return E;
1375  }
1376  return E;
1377 }
1378 
1379 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1380  MachineBasicBlock::iterator I, unsigned Limit) {
1381  MachineBasicBlock::iterator B = I->getParent()->begin();
1382  MachineBasicBlock::iterator E = I->getParent()->end();
1383  MachineInstr &MemMI = *I;
1385 
1386  unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
1387  int Offset = getLdStOffsetOp(MemMI).getImm();
1388 
1389  // If the load/store is the first instruction in the block, there's obviously
1390  // not any matching update. Ditto if the memory offset isn't zero.
1391  if (MBBI == B || Offset != 0)
1392  return E;
1393  // If the base register overlaps a destination register, we can't
1394  // merge the update.
1395  bool IsPairedInsn = isPairedLdSt(MemMI);
1396  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1397  unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
1398  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1399  return E;
1400  }
1401 
1402  // Track which registers have been modified and used between the first insn
1403  // (inclusive) and the second insn.
1404  ModifiedRegs.reset();
1405  UsedRegs.reset();
1406  unsigned Count = 0;
1407  do {
1408  --MBBI;
1409  MachineInstr &MI = *MBBI;
1410 
1411  // Don't count transient instructions towards the search limit since there
1412  // may be different numbers of them if e.g. debug information is present.
1413  if (!MI.isTransient())
1414  ++Count;
1415 
1416  // If we found a match, return it.
1417  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
1418  return MBBI;
1419 
1420  // Update the status of what the instruction clobbered and used.
1421  trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1422 
1423  // Otherwise, if the base register is used or modified, we have no match, so
1424  // return early.
1425  if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1426  return E;
1427  } while (MBBI != B && Count < Limit);
1428  return E;
1429 }
1430 
1431 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
1433  MachineInstr &MI = *MBBI;
1434  // If this is a volatile load, don't mess with it.
1435  if (MI.hasOrderedMemoryRef())
1436  return false;
1437 
1438  // Make sure this is a reg+imm.
1439  // FIXME: It is possible to extend it to handle reg+reg cases.
1440  if (!getLdStOffsetOp(MI).isImm())
1441  return false;
1442 
1443  // Look backward up to LdStLimit instructions.
1445  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
1446  ++NumLoadsFromStoresPromoted;
1447  // Promote the load. Keeping the iterator straight is a
1448  // pain, so we let the merge routine tell us what the next instruction
1449  // is after it's done mucking about.
1450  MBBI = promoteLoadFromStore(MBBI, StoreI);
1451  return true;
1452  }
1453  return false;
1454 }
1455 
1456 // Merge adjacent zero stores into a wider store.
1457 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
1459  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
1460  MachineInstr &MI = *MBBI;
1462 
1463  if (!TII->isCandidateToMergeOrPair(MI))
1464  return false;
1465 
1466  // Look ahead up to LdStLimit instructions for a mergable instruction.
1467  LdStPairFlags Flags;
1468  MachineBasicBlock::iterator MergeMI =
1469  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
1470  if (MergeMI != E) {
1471  ++NumZeroStoresPromoted;
1472 
1473  // Keeping the iterator straight is a pain, so we let the merge routine tell
1474  // us what the next instruction is after it's done mucking about.
1475  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
1476  return true;
1477  }
1478  return false;
1479 }
1480 
1481 // Find loads and stores that can be merged into a single load or store pair
1482 // instruction.
1483 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
1484  MachineInstr &MI = *MBBI;
1486 
1487  if (!TII->isCandidateToMergeOrPair(MI))
1488  return false;
1489 
1490  // Early exit if the offset is not possible to match. (6 bits of positive
1491  // range, plus allow an extra one in case we find a later insn that matches
1492  // with Offset-1)
1493  bool IsUnscaled = TII->isUnscaledLdSt(MI);
1494  int Offset = getLdStOffsetOp(MI).getImm();
1495  int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
1496  // Allow one more for offset.
1497  if (Offset > 0)
1498  Offset -= OffsetStride;
1499  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
1500  return false;
1501 
1502  // Look ahead up to LdStLimit instructions for a pairable instruction.
1503  LdStPairFlags Flags;
1505  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
1506  if (Paired != E) {
1507  ++NumPairCreated;
1508  if (TII->isUnscaledLdSt(MI))
1509  ++NumUnscaledPairCreated;
1510  // Keeping the iterator straight is a pain, so we let the merge routine tell
1511  // us what the next instruction is after it's done mucking about.
1512  MBBI = mergePairedInsns(MBBI, Paired, Flags);
1513  return true;
1514  }
1515  return false;
1516 }
1517 
1518 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1519  bool EnableNarrowZeroStOpt) {
1520  bool Modified = false;
1521  // Four tranformations to do here:
1522  // 1) Find loads that directly read from stores and promote them by
1523  // replacing with mov instructions. If the store is wider than the load,
1524  // the load will be replaced with a bitfield extract.
1525  // e.g.,
1526  // str w1, [x0, #4]
1527  // ldrh w2, [x0, #6]
1528  // ; becomes
1529  // str w1, [x0, #4]
1530  // lsr w2, w1, #16
1531  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1532  MBBI != E;) {
1533  MachineInstr &MI = *MBBI;
1534  switch (MI.getOpcode()) {
1535  default:
1536  // Just move on to the next instruction.
1537  ++MBBI;
1538  break;
1539  // Scaled instructions.
1540  case AArch64::LDRBBui:
1541  case AArch64::LDRHHui:
1542  case AArch64::LDRWui:
1543  case AArch64::LDRXui:
1544  // Unscaled instructions.
1545  case AArch64::LDURBBi:
1546  case AArch64::LDURHHi:
1547  case AArch64::LDURWi:
1548  case AArch64::LDURXi: {
1549  if (tryToPromoteLoadFromStore(MBBI)) {
1550  Modified = true;
1551  break;
1552  }
1553  ++MBBI;
1554  break;
1555  }
1556  }
1557  }
1558  // 2) Merge adjacent zero stores into a wider store.
1559  // e.g.,
1560  // strh wzr, [x0]
1561  // strh wzr, [x0, #2]
1562  // ; becomes
1563  // str wzr, [x0]
1564  // e.g.,
1565  // str wzr, [x0]
1566  // str wzr, [x0, #4]
1567  // ; becomes
1568  // str xzr, [x0]
1569  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1570  EnableNarrowZeroStOpt && MBBI != E;) {
1571  if (isPromotableZeroStoreInst(*MBBI)) {
1572  if (tryToMergeZeroStInst(MBBI)) {
1573  Modified = true;
1574  } else
1575  ++MBBI;
1576  } else
1577  ++MBBI;
1578  }
1579 
1580  // 3) Find loads and stores that can be merged into a single load or store
1581  // pair instruction.
1582  // e.g.,
1583  // ldr x0, [x2]
1584  // ldr x1, [x2, #8]
1585  // ; becomes
1586  // ldp x0, x1, [x2]
1587  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1588  MBBI != E;) {
1589  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
1590  Modified = true;
1591  else
1592  ++MBBI;
1593  }
1594  // 4) Find base register updates that can be merged into the load or store
1595  // as a base-reg writeback.
1596  // e.g.,
1597  // ldr x0, [x2]
1598  // add x2, x2, #4
1599  // ; becomes
1600  // ldr x0, [x2], #4
1601  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1602  MBBI != E;) {
1603  MachineInstr &MI = *MBBI;
1604  // Do update merging. It's simpler to keep this separate from the above
1605  // switchs, though not strictly necessary.
1606  unsigned Opc = MI.getOpcode();
1607  switch (Opc) {
1608  default:
1609  // Just move on to the next instruction.
1610  ++MBBI;
1611  break;
1612  // Scaled instructions.
1613  case AArch64::STRSui:
1614  case AArch64::STRDui:
1615  case AArch64::STRQui:
1616  case AArch64::STRXui:
1617  case AArch64::STRWui:
1618  case AArch64::STRHHui:
1619  case AArch64::STRBBui:
1620  case AArch64::LDRSui:
1621  case AArch64::LDRDui:
1622  case AArch64::LDRQui:
1623  case AArch64::LDRXui:
1624  case AArch64::LDRWui:
1625  case AArch64::LDRHHui:
1626  case AArch64::LDRBBui:
1627  // Unscaled instructions.
1628  case AArch64::STURSi:
1629  case AArch64::STURDi:
1630  case AArch64::STURQi:
1631  case AArch64::STURWi:
1632  case AArch64::STURXi:
1633  case AArch64::LDURSi:
1634  case AArch64::LDURDi:
1635  case AArch64::LDURQi:
1636  case AArch64::LDURWi:
1637  case AArch64::LDURXi:
1638  // Paired instructions.
1639  case AArch64::LDPSi:
1640  case AArch64::LDPSWi:
1641  case AArch64::LDPDi:
1642  case AArch64::LDPQi:
1643  case AArch64::LDPWi:
1644  case AArch64::LDPXi:
1645  case AArch64::STPSi:
1646  case AArch64::STPDi:
1647  case AArch64::STPQi:
1648  case AArch64::STPWi:
1649  case AArch64::STPXi: {
1650  // Make sure this is a reg+imm (as opposed to an address reloc).
1651  if (!getLdStOffsetOp(MI).isImm()) {
1652  ++MBBI;
1653  break;
1654  }
1655  // Look forward to try to form a post-index instruction. For example,
1656  // ldr x0, [x20]
1657  // add x20, x20, #32
1658  // merged into:
1659  // ldr x0, [x20], #32
1661  findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
1662  if (Update != E) {
1663  // Merge the update into the ld/st.
1664  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
1665  Modified = true;
1666  ++NumPostFolded;
1667  break;
1668  }
1669  // Don't know how to handle pre/post-index versions, so move to the next
1670  // instruction.
1671  if (TII->isUnscaledLdSt(Opc)) {
1672  ++MBBI;
1673  break;
1674  }
1675 
1676  // Look back to try to find a pre-index instruction. For example,
1677  // add x0, x0, #8
1678  // ldr x1, [x0]
1679  // merged into:
1680  // ldr x1, [x0, #8]!
1681  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
1682  if (Update != E) {
1683  // Merge the update into the ld/st.
1684  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
1685  Modified = true;
1686  ++NumPreFolded;
1687  break;
1688  }
1689  // The immediate in the load/store is scaled by the size of the memory
1690  // operation. The immediate in the add we're looking for,
1691  // however, is not, so adjust here.
1692  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
1693 
1694  // Look forward to try to find a post-index instruction. For example,
1695  // ldr x1, [x0, #64]
1696  // add x0, x0, #64
1697  // merged into:
1698  // ldr x1, [x0, #64]!
1699  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
1700  if (Update != E) {
1701  // Merge the update into the ld/st.
1702  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
1703  Modified = true;
1704  ++NumPreFolded;
1705  break;
1706  }
1707 
1708  // Nothing found. Just move to the next instruction.
1709  ++MBBI;
1710  break;
1711  }
1712  }
1713  }
1714 
1715  return Modified;
1716 }
1717 
1718 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1719  if (skipFunction(*Fn.getFunction()))
1720  return false;
1721 
1722  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
1723  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
1724  TRI = Subtarget->getRegisterInfo();
1725 
1726  // Resize the modified and used register bitfield trackers. We do this once
1727  // per function and then clear the bitfield each time we optimize a load or
1728  // store.
1729  ModifiedRegs.resize(TRI->getNumRegs());
1730  UsedRegs.resize(TRI->getNumRegs());
1731 
1732  bool Modified = false;
1733  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
1734  for (auto &MBB : Fn)
1735  Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
1736 
1737  return Modified;
1738 }
1739 
1740 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
1741 // stores near one another? Note: The pre-RA instruction scheduler already has
1742 // hooks to try and schedule pairable loads/stores together to improve pairing
1743 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
1744 
1745 // FIXME: When pairing store instructions it's very possible for this pass to
1746 // hoist a store with a KILL marker above another use (without a KILL marker).
1747 // The resulting IR is invalid, but nothing uses the KILL markers after this
1748 // pass, so it's never caused a problem in practice.
1749 
1750 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
1751 /// load / store optimization pass.
1753  return new AArch64LoadStoreOpt();
1754 }
BitVector & set()
Definition: BitVector.h:219
INITIALIZE_PASS(AArch64LoadStoreOpt,"aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
size_t i
void initializeAArch64LoadStoreOptPass(PassRegistry &)
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:605
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
Definition: BitVector.h:495
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
A debug info location.
Definition: DebugLoc.h:34
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
An instruction for reading from memory.
Definition: Instructions.h:164
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
bool isUnscaledLdSt(unsigned Opc) const
Return true if this is an unscaled load/store.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
struct fuzzer::@269 Flags
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static unsigned getMatchingPairOpcode(unsigned Opc)
const HexagonInstrInfo * TII
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
void setImplicit(bool Val=true)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
Reg
All possible values of the reg field in the ModR/M byte.
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, const AArch64InstrInfo *TII)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static bool isPairedLdSt(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
MachineBasicBlock * MBB
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass...
int64_t getImm() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static int getMemScale(MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
#define AARCH64_LOAD_STORE_OPT_NAME
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
MCRegAliasIterator enumerates all registers aliasing Reg.
static unsigned getMatchingWideOpcode(unsigned Opc)
uint32_t Offset
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
self_iterator getIterator()
Definition: ilist_node.h:81
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
static bool isPromotableZeroStoreInst(MachineInstr &MI)
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:833
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
MachineFunctionProperties & set(Property P)
Representation of each machine instruction.
Definition: MachineInstr.h:52
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:615
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:424
static const MachineOperand & getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp=0)
bool isLdStPairSuppressed(const MachineInstr &MI) const
Return true if pairing the given load or store is hinted to be unprofitable.
static unsigned getPreIndexedOpcode(unsigned Opc)
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getPostIndexedOpcode(unsigned Opc)
LLVM Value Representation.
Definition: Value.h:71
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
Properties which a MachineFunction may have at a given point in time.
static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, BitVector &UsedRegs, const TargetRegisterInfo *TRI)
trackRegDefsUses - Remember what registers the specified instruction uses and modifies.