LLVM  15.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 // The pass runs after the PrologEpilogInserter where we emit the CFI
13 // instructions. In order to preserve the correctness of the unwind informaiton,
14 // the pass should not change the order of any two instructions, one of which
15 // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16 // to unwind information.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64InstrInfo.h"
22 #include "AArch64Subtarget.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCDwarf.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <functional>
51 #include <iterator>
52 #include <limits>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "aarch64-ldst-opt"
57 
58 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
59 STATISTIC(NumPostFolded, "Number of post-index updates folded");
60 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
61 STATISTIC(NumUnscaledPairCreated,
62  "Number of load/store from unscaled generated");
63 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
64 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
65 
66 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
67  "Controls which pairs are considered for renaming");
68 
69 // The LdStLimit limits how far we search for load/store pairs.
70 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
71  cl::init(20), cl::Hidden);
72 
73 // The UpdateLimit limits how far we search for update instructions when we form
74 // pre-/post-index instructions.
75 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
76  cl::Hidden);
77 
78 // Enable register renaming to find additional store pairing opportunities.
79 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
80  cl::init(true), cl::Hidden);
81 
82 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
83 
84 namespace {
85 
86 using LdStPairFlags = struct LdStPairFlags {
87  // If a matching instruction is found, MergeForward is set to true if the
88  // merge is to remove the first instruction and replace the second with
89  // a pair-wise insn, and false if the reverse is true.
90  bool MergeForward = false;
91 
92  // SExtIdx gives the index of the result of the load pair that must be
93  // extended. The value of SExtIdx assumes that the paired load produces the
94  // value in this order: (I, returned iterator), i.e., -1 means no value has
95  // to be extended, 0 means I, and 1 means the returned iterator.
96  int SExtIdx = -1;
97 
98  // If not none, RenameReg can be used to rename the result register of the
99  // first store in a pair. Currently this only works when merging stores
100  // forward.
101  Optional<MCPhysReg> RenameReg = None;
102 
103  LdStPairFlags() = default;
104 
105  void setMergeForward(bool V = true) { MergeForward = V; }
106  bool getMergeForward() const { return MergeForward; }
107 
108  void setSExtIdx(int V) { SExtIdx = V; }
109  int getSExtIdx() const { return SExtIdx; }
110 
111  void setRenameReg(MCPhysReg R) { RenameReg = R; }
112  void clearRenameReg() { RenameReg = None; }
113  Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
114 };
115 
116 struct AArch64LoadStoreOpt : public MachineFunctionPass {
117  static char ID;
118 
119  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
121  }
122 
123  AliasAnalysis *AA;
124  const AArch64InstrInfo *TII;
125  const TargetRegisterInfo *TRI;
126  const AArch64Subtarget *Subtarget;
127 
128  // Track which register units have been modified and used.
129  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
130  LiveRegUnits DefinedInBB;
131 
132  void getAnalysisUsage(AnalysisUsage &AU) const override {
135  }
136 
137  // Scan the instructions looking for a load/store that can be combined
138  // with the current instruction into a load/store pair.
139  // Return the matching instruction if one is found, else MBB->end().
141  LdStPairFlags &Flags,
142  unsigned Limit,
143  bool FindNarrowMerge);
144 
145  // Scan the instructions looking for a store that writes to the address from
146  // which the current load instruction reads. Return true if one is found.
147  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
149 
150  // Merge the two instructions indicated into a wider narrow store instruction.
152  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
154  const LdStPairFlags &Flags);
155 
156  // Merge the two instructions indicated into a single pair-wise instruction.
158  mergePairedInsns(MachineBasicBlock::iterator I,
160  const LdStPairFlags &Flags);
161 
162  // Promote the load that reads directly from the address stored to.
164  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
166 
167  // Scan the instruction list to find a base register update that can
168  // be combined with the current instruction (a load or store) using
169  // pre or post indexed addressing with writeback. Scan forwards.
171  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
172  int UnscaledOffset, unsigned Limit);
173 
174  // Scan the instruction list to find a base register update that can
175  // be combined with the current instruction (a load or store) using
176  // pre or post indexed addressing with writeback. Scan backwards.
178  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
179 
180  // Find an instruction that updates the base register of the ld/st
181  // instruction.
182  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
183  unsigned BaseReg, int Offset);
184 
185  // Merge a pre- or post-index base register update into a ld/st instruction.
187  mergeUpdateInsn(MachineBasicBlock::iterator I,
188  MachineBasicBlock::iterator Update, bool IsPreIdx);
189 
190  // Find and merge zero store instructions.
191  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
192 
193  // Find and pair ldr/str instructions.
194  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
195 
196  // Find and promote load instructions which read directly from store.
197  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
198 
199  // Find and merge a base register updates before or after a ld/st instruction.
200  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
201 
202  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
203 
204  bool runOnMachineFunction(MachineFunction &Fn) override;
205 
206  MachineFunctionProperties getRequiredProperties() const override {
209  }
210 
211  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
212 };
213 
214 char AArch64LoadStoreOpt::ID = 0;
215 
216 } // end anonymous namespace
217 
218 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
219  AARCH64_LOAD_STORE_OPT_NAME, false, false)
220 
221 static bool isNarrowStore(unsigned Opc) {
222  switch (Opc) {
223  default:
224  return false;
225  case AArch64::STRBBui:
226  case AArch64::STURBBi:
227  case AArch64::STRHHui:
228  case AArch64::STURHHi:
229  return true;
230  }
231 }
232 
233 // These instruction set memory tag and either keep memory contents unchanged or
234 // set it to zero, ignoring the address part of the source register.
235 static bool isTagStore(const MachineInstr &MI) {
236  switch (MI.getOpcode()) {
237  default:
238  return false;
239  case AArch64::STGOffset:
240  case AArch64::STZGOffset:
241  case AArch64::ST2GOffset:
242  case AArch64::STZ2GOffset:
243  return true;
244  }
245 }
246 
247 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
248  bool *IsValidLdStrOpc = nullptr) {
249  if (IsValidLdStrOpc)
250  *IsValidLdStrOpc = true;
251  switch (Opc) {
252  default:
253  if (IsValidLdStrOpc)
254  *IsValidLdStrOpc = false;
256  case AArch64::STRDui:
257  case AArch64::STURDi:
258  case AArch64::STRDpre:
259  case AArch64::STRQui:
260  case AArch64::STURQi:
261  case AArch64::STRQpre:
262  case AArch64::STRBBui:
263  case AArch64::STURBBi:
264  case AArch64::STRHHui:
265  case AArch64::STURHHi:
266  case AArch64::STRWui:
267  case AArch64::STRWpre:
268  case AArch64::STURWi:
269  case AArch64::STRXui:
270  case AArch64::STRXpre:
271  case AArch64::STURXi:
272  case AArch64::LDRDui:
273  case AArch64::LDURDi:
274  case AArch64::LDRDpre:
275  case AArch64::LDRQui:
276  case AArch64::LDURQi:
277  case AArch64::LDRQpre:
278  case AArch64::LDRWui:
279  case AArch64::LDURWi:
280  case AArch64::LDRWpre:
281  case AArch64::LDRXui:
282  case AArch64::LDURXi:
283  case AArch64::LDRXpre:
284  case AArch64::STRSui:
285  case AArch64::STURSi:
286  case AArch64::STRSpre:
287  case AArch64::LDRSui:
288  case AArch64::LDURSi:
289  case AArch64::LDRSpre:
290  return Opc;
291  case AArch64::LDRSWui:
292  return AArch64::LDRWui;
293  case AArch64::LDURSWi:
294  return AArch64::LDURWi;
295  }
296 }
297 
298 static unsigned getMatchingWideOpcode(unsigned Opc) {
299  switch (Opc) {
300  default:
301  llvm_unreachable("Opcode has no wide equivalent!");
302  case AArch64::STRBBui:
303  return AArch64::STRHHui;
304  case AArch64::STRHHui:
305  return AArch64::STRWui;
306  case AArch64::STURBBi:
307  return AArch64::STURHHi;
308  case AArch64::STURHHi:
309  return AArch64::STURWi;
310  case AArch64::STURWi:
311  return AArch64::STURXi;
312  case AArch64::STRWui:
313  return AArch64::STRXui;
314  }
315 }
316 
317 static unsigned getMatchingPairOpcode(unsigned Opc) {
318  switch (Opc) {
319  default:
320  llvm_unreachable("Opcode has no pairwise equivalent!");
321  case AArch64::STRSui:
322  case AArch64::STURSi:
323  return AArch64::STPSi;
324  case AArch64::STRSpre:
325  return AArch64::STPSpre;
326  case AArch64::STRDui:
327  case AArch64::STURDi:
328  return AArch64::STPDi;
329  case AArch64::STRDpre:
330  return AArch64::STPDpre;
331  case AArch64::STRQui:
332  case AArch64::STURQi:
333  return AArch64::STPQi;
334  case AArch64::STRQpre:
335  return AArch64::STPQpre;
336  case AArch64::STRWui:
337  case AArch64::STURWi:
338  return AArch64::STPWi;
339  case AArch64::STRWpre:
340  return AArch64::STPWpre;
341  case AArch64::STRXui:
342  case AArch64::STURXi:
343  return AArch64::STPXi;
344  case AArch64::STRXpre:
345  return AArch64::STPXpre;
346  case AArch64::LDRSui:
347  case AArch64::LDURSi:
348  return AArch64::LDPSi;
349  case AArch64::LDRSpre:
350  return AArch64::LDPSpre;
351  case AArch64::LDRDui:
352  case AArch64::LDURDi:
353  return AArch64::LDPDi;
354  case AArch64::LDRDpre:
355  return AArch64::LDPDpre;
356  case AArch64::LDRQui:
357  case AArch64::LDURQi:
358  return AArch64::LDPQi;
359  case AArch64::LDRQpre:
360  return AArch64::LDPQpre;
361  case AArch64::LDRWui:
362  case AArch64::LDURWi:
363  return AArch64::LDPWi;
364  case AArch64::LDRWpre:
365  return AArch64::LDPWpre;
366  case AArch64::LDRXui:
367  case AArch64::LDURXi:
368  return AArch64::LDPXi;
369  case AArch64::LDRXpre:
370  return AArch64::LDPXpre;
371  case AArch64::LDRSWui:
372  case AArch64::LDURSWi:
373  return AArch64::LDPSWi;
374  }
375 }
376 
379  unsigned LdOpc = LoadInst.getOpcode();
380  unsigned StOpc = StoreInst.getOpcode();
381  switch (LdOpc) {
382  default:
383  llvm_unreachable("Unsupported load instruction!");
384  case AArch64::LDRBBui:
385  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
386  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
387  case AArch64::LDURBBi:
388  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
389  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
390  case AArch64::LDRHHui:
391  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
392  StOpc == AArch64::STRXui;
393  case AArch64::LDURHHi:
394  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
395  StOpc == AArch64::STURXi;
396  case AArch64::LDRWui:
397  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
398  case AArch64::LDURWi:
399  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
400  case AArch64::LDRXui:
401  return StOpc == AArch64::STRXui;
402  case AArch64::LDURXi:
403  return StOpc == AArch64::STURXi;
404  }
405 }
406 
407 static unsigned getPreIndexedOpcode(unsigned Opc) {
408  // FIXME: We don't currently support creating pre-indexed loads/stores when
409  // the load or store is the unscaled version. If we decide to perform such an
410  // optimization in the future the cases for the unscaled loads/stores will
411  // need to be added here.
412  switch (Opc) {
413  default:
414  llvm_unreachable("Opcode has no pre-indexed equivalent!");
415  case AArch64::STRSui:
416  return AArch64::STRSpre;
417  case AArch64::STRDui:
418  return AArch64::STRDpre;
419  case AArch64::STRQui:
420  return AArch64::STRQpre;
421  case AArch64::STRBBui:
422  return AArch64::STRBBpre;
423  case AArch64::STRHHui:
424  return AArch64::STRHHpre;
425  case AArch64::STRWui:
426  return AArch64::STRWpre;
427  case AArch64::STRXui:
428  return AArch64::STRXpre;
429  case AArch64::LDRSui:
430  return AArch64::LDRSpre;
431  case AArch64::LDRDui:
432  return AArch64::LDRDpre;
433  case AArch64::LDRQui:
434  return AArch64::LDRQpre;
435  case AArch64::LDRBBui:
436  return AArch64::LDRBBpre;
437  case AArch64::LDRHHui:
438  return AArch64::LDRHHpre;
439  case AArch64::LDRWui:
440  return AArch64::LDRWpre;
441  case AArch64::LDRXui:
442  return AArch64::LDRXpre;
443  case AArch64::LDRSWui:
444  return AArch64::LDRSWpre;
445  case AArch64::LDPSi:
446  return AArch64::LDPSpre;
447  case AArch64::LDPSWi:
448  return AArch64::LDPSWpre;
449  case AArch64::LDPDi:
450  return AArch64::LDPDpre;
451  case AArch64::LDPQi:
452  return AArch64::LDPQpre;
453  case AArch64::LDPWi:
454  return AArch64::LDPWpre;
455  case AArch64::LDPXi:
456  return AArch64::LDPXpre;
457  case AArch64::STPSi:
458  return AArch64::STPSpre;
459  case AArch64::STPDi:
460  return AArch64::STPDpre;
461  case AArch64::STPQi:
462  return AArch64::STPQpre;
463  case AArch64::STPWi:
464  return AArch64::STPWpre;
465  case AArch64::STPXi:
466  return AArch64::STPXpre;
467  case AArch64::STGOffset:
468  return AArch64::STGPreIndex;
469  case AArch64::STZGOffset:
470  return AArch64::STZGPreIndex;
471  case AArch64::ST2GOffset:
472  return AArch64::ST2GPreIndex;
473  case AArch64::STZ2GOffset:
474  return AArch64::STZ2GPreIndex;
475  case AArch64::STGPi:
476  return AArch64::STGPpre;
477  }
478 }
479 
480 static unsigned getPostIndexedOpcode(unsigned Opc) {
481  switch (Opc) {
482  default:
483  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
484  case AArch64::STRSui:
485  case AArch64::STURSi:
486  return AArch64::STRSpost;
487  case AArch64::STRDui:
488  case AArch64::STURDi:
489  return AArch64::STRDpost;
490  case AArch64::STRQui:
491  case AArch64::STURQi:
492  return AArch64::STRQpost;
493  case AArch64::STRBBui:
494  return AArch64::STRBBpost;
495  case AArch64::STRHHui:
496  return AArch64::STRHHpost;
497  case AArch64::STRWui:
498  case AArch64::STURWi:
499  return AArch64::STRWpost;
500  case AArch64::STRXui:
501  case AArch64::STURXi:
502  return AArch64::STRXpost;
503  case AArch64::LDRSui:
504  case AArch64::LDURSi:
505  return AArch64::LDRSpost;
506  case AArch64::LDRDui:
507  case AArch64::LDURDi:
508  return AArch64::LDRDpost;
509  case AArch64::LDRQui:
510  case AArch64::LDURQi:
511  return AArch64::LDRQpost;
512  case AArch64::LDRBBui:
513  return AArch64::LDRBBpost;
514  case AArch64::LDRHHui:
515  return AArch64::LDRHHpost;
516  case AArch64::LDRWui:
517  case AArch64::LDURWi:
518  return AArch64::LDRWpost;
519  case AArch64::LDRXui:
520  case AArch64::LDURXi:
521  return AArch64::LDRXpost;
522  case AArch64::LDRSWui:
523  return AArch64::LDRSWpost;
524  case AArch64::LDPSi:
525  return AArch64::LDPSpost;
526  case AArch64::LDPSWi:
527  return AArch64::LDPSWpost;
528  case AArch64::LDPDi:
529  return AArch64::LDPDpost;
530  case AArch64::LDPQi:
531  return AArch64::LDPQpost;
532  case AArch64::LDPWi:
533  return AArch64::LDPWpost;
534  case AArch64::LDPXi:
535  return AArch64::LDPXpost;
536  case AArch64::STPSi:
537  return AArch64::STPSpost;
538  case AArch64::STPDi:
539  return AArch64::STPDpost;
540  case AArch64::STPQi:
541  return AArch64::STPQpost;
542  case AArch64::STPWi:
543  return AArch64::STPWpost;
544  case AArch64::STPXi:
545  return AArch64::STPXpost;
546  case AArch64::STGOffset:
547  return AArch64::STGPostIndex;
548  case AArch64::STZGOffset:
549  return AArch64::STZGPostIndex;
550  case AArch64::ST2GOffset:
551  return AArch64::ST2GPostIndex;
552  case AArch64::STZ2GOffset:
553  return AArch64::STZ2GPostIndex;
554  case AArch64::STGPi:
555  return AArch64::STGPpost;
556  }
557 }
558 
560 
561  unsigned OpcA = FirstMI.getOpcode();
562  unsigned OpcB = MI.getOpcode();
563 
564  switch (OpcA) {
565  default:
566  return false;
567  case AArch64::STRSpre:
568  return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
569  case AArch64::STRDpre:
570  return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
571  case AArch64::STRQpre:
572  return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
573  case AArch64::STRWpre:
574  return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
575  case AArch64::STRXpre:
576  return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
577  case AArch64::LDRSpre:
578  return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
579  case AArch64::LDRDpre:
580  return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
581  case AArch64::LDRQpre:
582  return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
583  case AArch64::LDRWpre:
584  return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
585  case AArch64::LDRXpre:
586  return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
587  }
588 }
589 
590 // Returns the scale and offset range of pre/post indexed variants of MI.
591 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
592  int &MinOffset, int &MaxOffset) {
593  bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
594  bool IsTagStore = isTagStore(MI);
595  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
596  // as in the "unsigned offset" variant.
597  // All other pre/post indexed ldst instructions are unscaled.
598  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
599 
600  if (IsPaired) {
601  MinOffset = -64;
602  MaxOffset = 63;
603  } else {
604  MinOffset = -256;
605  MaxOffset = 255;
606  }
607 }
608 
610  unsigned PairedRegOp = 0) {
611  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
612  bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
613  if (IsPreLdSt)
614  PairedRegOp += 1;
615  unsigned Idx =
616  AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
617  return MI.getOperand(Idx);
618 }
619 
622  const AArch64InstrInfo *TII) {
623  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
624  int LoadSize = TII->getMemScale(LoadInst);
625  int StoreSize = TII->getMemScale(StoreInst);
626  int UnscaledStOffset =
627  TII->hasUnscaledLdStOffset(StoreInst)
630  int UnscaledLdOffset =
631  TII->hasUnscaledLdStOffset(LoadInst)
634  return (UnscaledStOffset <= UnscaledLdOffset) &&
635  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
636 }
637 
639  unsigned Opc = MI.getOpcode();
640  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
641  isNarrowStore(Opc)) &&
642  getLdStRegOp(MI).getReg() == AArch64::WZR;
643 }
644 
646  switch (MI.getOpcode()) {
647  default:
648  return false;
649  // Scaled instructions.
650  case AArch64::LDRBBui:
651  case AArch64::LDRHHui:
652  case AArch64::LDRWui:
653  case AArch64::LDRXui:
654  // Unscaled instructions.
655  case AArch64::LDURBBi:
656  case AArch64::LDURHHi:
657  case AArch64::LDURWi:
658  case AArch64::LDURXi:
659  return true;
660  }
661 }
662 
664  unsigned Opc = MI.getOpcode();
665  switch (Opc) {
666  default:
667  return false;
668  // Scaled instructions.
669  case AArch64::STRSui:
670  case AArch64::STRDui:
671  case AArch64::STRQui:
672  case AArch64::STRXui:
673  case AArch64::STRWui:
674  case AArch64::STRHHui:
675  case AArch64::STRBBui:
676  case AArch64::LDRSui:
677  case AArch64::LDRDui:
678  case AArch64::LDRQui:
679  case AArch64::LDRXui:
680  case AArch64::LDRWui:
681  case AArch64::LDRHHui:
682  case AArch64::LDRBBui:
683  case AArch64::STGOffset:
684  case AArch64::STZGOffset:
685  case AArch64::ST2GOffset:
686  case AArch64::STZ2GOffset:
687  case AArch64::STGPi:
688  // Unscaled instructions.
689  case AArch64::STURSi:
690  case AArch64::STURDi:
691  case AArch64::STURQi:
692  case AArch64::STURWi:
693  case AArch64::STURXi:
694  case AArch64::LDURSi:
695  case AArch64::LDURDi:
696  case AArch64::LDURQi:
697  case AArch64::LDURWi:
698  case AArch64::LDURXi:
699  // Paired instructions.
700  case AArch64::LDPSi:
701  case AArch64::LDPSWi:
702  case AArch64::LDPDi:
703  case AArch64::LDPQi:
704  case AArch64::LDPWi:
705  case AArch64::LDPXi:
706  case AArch64::STPSi:
707  case AArch64::STPDi:
708  case AArch64::STPQi:
709  case AArch64::STPWi:
710  case AArch64::STPXi:
711  // Make sure this is a reg+imm (as opposed to an address reloc).
713  return false;
714 
715  return true;
716  }
717 }
718 
720 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
722  const LdStPairFlags &Flags) {
724  "Expected promotable zero stores.");
725 
726  MachineBasicBlock::iterator E = I->getParent()->end();
728  // If NextI is the second of the two instructions to be merged, we need
729  // to skip one further. Either way we merge will invalidate the iterator,
730  // and we don't need to scan the new instruction, as it's a pairwise
731  // instruction, which we're not considering for further action anyway.
732  if (NextI == MergeMI)
733  NextI = next_nodbg(NextI, E);
734 
735  unsigned Opc = I->getOpcode();
736  bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
737  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
738 
739  bool MergeForward = Flags.getMergeForward();
740  // Insert our new paired instruction after whichever of the paired
741  // instructions MergeForward indicates.
742  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
743  // Also based on MergeForward is from where we copy the base register operand
744  // so we get the flags compatible with the input code.
745  const MachineOperand &BaseRegOp =
746  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
747  : AArch64InstrInfo::getLdStBaseOp(*I);
748 
749  // Which register is Rt and which is Rt2 depends on the offset order.
750  MachineInstr *RtMI;
752  AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
753  RtMI = &*MergeMI;
754  else
755  RtMI = &*I;
756 
757  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
758  // Change the scaled offset from small to large type.
759  if (IsScaled) {
760  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
761  OffsetImm /= 2;
762  }
763 
764  // Construct the new instruction.
765  DebugLoc DL = I->getDebugLoc();
766  MachineBasicBlock *MBB = I->getParent();
768  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
769  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
770  .add(BaseRegOp)
771  .addImm(OffsetImm)
772  .cloneMergedMemRefs({&*I, &*MergeMI})
773  .setMIFlags(I->mergeFlagsWith(*MergeMI));
774  (void)MIB;
775 
776  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
777  LLVM_DEBUG(I->print(dbgs()));
778  LLVM_DEBUG(dbgs() << " ");
779  LLVM_DEBUG(MergeMI->print(dbgs()));
780  LLVM_DEBUG(dbgs() << " with instruction:\n ");
781  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
782  LLVM_DEBUG(dbgs() << "\n");
783 
784  // Erase the old instructions.
785  I->eraseFromParent();
786  MergeMI->eraseFromParent();
787  return NextI;
788 }
789 
790 // Apply Fn to all instructions between MI and the beginning of the block, until
791 // a def for DefReg is reached. Returns true, iff Fn returns true for all
792 // visited instructions. Stop after visiting Limit iterations.
794  const TargetRegisterInfo *TRI, unsigned Limit,
795  std::function<bool(MachineInstr &, bool)> &Fn) {
796  auto MBB = MI.getParent();
797  for (MachineInstr &I :
798  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
799  if (!Limit)
800  return false;
801  --Limit;
802 
803  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
804  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
805  TRI->regsOverlap(MOP.getReg(), DefReg);
806  });
807  if (!Fn(I, isDef))
808  return false;
809  if (isDef)
810  break;
811  }
812  return true;
813 }
814 
816  const TargetRegisterInfo *TRI) {
817 
818  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
819  if (MOP.isReg() && MOP.isKill())
820  Units.removeReg(MOP.getReg());
821 
822  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
823  if (MOP.isReg() && !MOP.isKill())
824  Units.addReg(MOP.getReg());
825 }
826 
828 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
830  const LdStPairFlags &Flags) {
831  MachineBasicBlock::iterator E = I->getParent()->end();
833  // If NextI is the second of the two instructions to be merged, we need
834  // to skip one further. Either way we merge will invalidate the iterator,
835  // and we don't need to scan the new instruction, as it's a pairwise
836  // instruction, which we're not considering for further action anyway.
837  if (NextI == Paired)
838  NextI = next_nodbg(NextI, E);
839 
840  int SExtIdx = Flags.getSExtIdx();
841  unsigned Opc =
842  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
843  bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
844  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
845 
846  bool MergeForward = Flags.getMergeForward();
847 
848  Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
849  if (MergeForward && RenameReg) {
850  MCRegister RegToRename = getLdStRegOp(*I).getReg();
851  DefinedInBB.addReg(*RenameReg);
852 
853  // Return the sub/super register for RenameReg, matching the size of
854  // OriginalReg.
855  auto GetMatchingSubReg = [this,
856  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
857  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
858  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
859  TRI->getMinimalPhysRegClass(SubOrSuper))
860  return SubOrSuper;
861  llvm_unreachable("Should have found matching sub or super register!");
862  };
863 
864  std::function<bool(MachineInstr &, bool)> UpdateMIs =
865  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
866  if (IsDef) {
867  bool SeenDef = false;
868  for (auto &MOP : MI.operands()) {
869  // Rename the first explicit definition and all implicit
870  // definitions matching RegToRename.
871  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
872  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
873  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
874  assert((MOP.isImplicit() ||
875  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
876  "Need renamable operands");
877  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
878  SeenDef = true;
879  }
880  }
881  } else {
882  for (auto &MOP : MI.operands()) {
883  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
884  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
885  assert((MOP.isImplicit() ||
886  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
887  "Need renamable operands");
888  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
889  }
890  }
891  }
892  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
893  return true;
894  };
895  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
896 
897 #if !defined(NDEBUG)
898  // Make sure the register used for renaming is not used between the paired
899  // instructions. That would trash the content before the new paired
900  // instruction.
901  for (auto &MI :
903  std::next(I), std::next(Paired)))
904  assert(all_of(MI.operands(),
905  [this, &RenameReg](const MachineOperand &MOP) {
906  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
907  MOP.isUndef() ||
908  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
909  }) &&
910  "Rename register used between paired instruction, trashing the "
911  "content");
912 #endif
913  }
914 
915  // Insert our new paired instruction after whichever of the paired
916  // instructions MergeForward indicates.
917  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
918  // Also based on MergeForward is from where we copy the base register operand
919  // so we get the flags compatible with the input code.
920  const MachineOperand &BaseRegOp =
921  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
922  : AArch64InstrInfo::getLdStBaseOp(*I);
923 
925  int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
926  bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
927  if (IsUnscaled != PairedIsUnscaled) {
928  // We're trying to pair instructions that differ in how they are scaled. If
929  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
930  // the opposite (i.e., make Paired's offset unscaled).
931  int MemSize = TII->getMemScale(*Paired);
932  if (PairedIsUnscaled) {
933  // If the unscaled offset isn't a multiple of the MemSize, we can't
934  // pair the operations together.
935  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
936  "Offset should be a multiple of the stride!");
937  PairedOffset /= MemSize;
938  } else {
939  PairedOffset *= MemSize;
940  }
941  }
942 
943  // Which register is Rt and which is Rt2 depends on the offset order.
944  // However, for pre load/stores the Rt should be the one of the pre
945  // load/store.
946  MachineInstr *RtMI, *Rt2MI;
947  if (Offset == PairedOffset + OffsetStride &&
949  RtMI = &*Paired;
950  Rt2MI = &*I;
951  // Here we swapped the assumption made for SExtIdx.
952  // I.e., we turn ldp I, Paired into ldp Paired, I.
953  // Update the index accordingly.
954  if (SExtIdx != -1)
955  SExtIdx = (SExtIdx + 1) % 2;
956  } else {
957  RtMI = &*I;
958  Rt2MI = &*Paired;
959  }
960  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
961  // Scale the immediate offset, if necessary.
962  if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
963  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
964  "Unscaled offset cannot be scaled.");
965  OffsetImm /= TII->getMemScale(*RtMI);
966  }
967 
968  // Construct the new instruction.
970  DebugLoc DL = I->getDebugLoc();
971  MachineBasicBlock *MBB = I->getParent();
972  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
973  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
974  // Kill flags may become invalid when moving stores for pairing.
975  if (RegOp0.isUse()) {
976  if (!MergeForward) {
977  // Clear kill flags on store if moving upwards. Example:
978  // STRWui %w0, ...
979  // USE %w1
980  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
981  RegOp0.setIsKill(false);
982  RegOp1.setIsKill(false);
983  } else {
984  // Clear kill flags of the first stores register. Example:
985  // STRWui %w1, ...
986  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
987  // STRW %w0
989  for (MachineInstr &MI : make_range(std::next(I), Paired))
990  MI.clearRegisterKills(Reg, TRI);
991  }
992  }
993 
994  unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
995  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
996 
997  // Adds the pre-index operand for pre-indexed ld/st pairs.
998  if (AArch64InstrInfo::isPreLdSt(*RtMI))
999  MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1000 
1001  MIB.add(RegOp0)
1002  .add(RegOp1)
1003  .add(BaseRegOp)
1004  .addImm(OffsetImm)
1005  .cloneMergedMemRefs({&*I, &*Paired})
1006  .setMIFlags(I->mergeFlagsWith(*Paired));
1007 
1008  (void)MIB;
1009 
1010  LLVM_DEBUG(
1011  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1012  LLVM_DEBUG(I->print(dbgs()));
1013  LLVM_DEBUG(dbgs() << " ");
1014  LLVM_DEBUG(Paired->print(dbgs()));
1015  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1016  if (SExtIdx != -1) {
1017  // Generate the sign extension for the proper result of the ldp.
1018  // I.e., with X1, that would be:
1019  // %w1 = KILL %w1, implicit-def %x1
1020  // %x1 = SBFMXri killed %x1, 0, 31
1021  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1022  // Right now, DstMO has the extended register, since it comes from an
1023  // extended opcode.
1024  Register DstRegX = DstMO.getReg();
1025  // Get the W variant of that register.
1026  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1027  // Update the result of LDP to use the W instead of the X variant.
1028  DstMO.setReg(DstRegW);
1029  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1030  LLVM_DEBUG(dbgs() << "\n");
1031  // Make the machine verifier happy by providing a definition for
1032  // the X register.
1033  // Insert this definition right after the generated LDP, i.e., before
1034  // InsertionPoint.
1035  MachineInstrBuilder MIBKill =
1036  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1037  .addReg(DstRegW)
1038  .addReg(DstRegX, RegState::Define);
1039  MIBKill->getOperand(2).setImplicit();
1040  // Create the sign extension.
1041  MachineInstrBuilder MIBSXTW =
1042  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1043  .addReg(DstRegX)
1044  .addImm(0)
1045  .addImm(31);
1046  (void)MIBSXTW;
1047  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1048  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1049  } else {
1050  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1051  }
1052  LLVM_DEBUG(dbgs() << "\n");
1053 
1054  if (MergeForward)
1055  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1056  if (MOP.isReg() && MOP.isKill())
1057  DefinedInBB.addReg(MOP.getReg());
1058 
1059  // Erase the old instructions.
1060  I->eraseFromParent();
1061  Paired->eraseFromParent();
1062 
1063  return NextI;
1064 }
1065 
1067 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1068  MachineBasicBlock::iterator StoreI) {
1070  next_nodbg(LoadI, LoadI->getParent()->end());
1071 
1072  int LoadSize = TII->getMemScale(*LoadI);
1073  int StoreSize = TII->getMemScale(*StoreI);
1074  Register LdRt = getLdStRegOp(*LoadI).getReg();
1075  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1076  Register StRt = getLdStRegOp(*StoreI).getReg();
1077  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1078 
1079  assert((IsStoreXReg ||
1080  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1081  "Unexpected RegClass");
1082 
1083  MachineInstr *BitExtMI;
1084  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1085  // Remove the load, if the destination register of the loads is the same
1086  // register for stored value.
1087  if (StRt == LdRt && LoadSize == 8) {
1088  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1089  LoadI->getIterator())) {
1090  if (MI.killsRegister(StRt, TRI)) {
1091  MI.clearRegisterKills(StRt, TRI);
1092  break;
1093  }
1094  }
1095  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1096  LLVM_DEBUG(LoadI->print(dbgs()));
1097  LLVM_DEBUG(dbgs() << "\n");
1098  LoadI->eraseFromParent();
1099  return NextI;
1100  }
1101  // Replace the load with a mov if the load and store are in the same size.
1102  BitExtMI =
1103  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1104  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1105  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1106  .add(StMO)
1108  .setMIFlags(LoadI->getFlags());
1109  } else {
1110  // FIXME: Currently we disable this transformation in big-endian targets as
1111  // performance and correctness are verified only in little-endian.
1112  if (!Subtarget->isLittleEndian())
1113  return NextI;
1114  bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1115  assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1116  "Unsupported ld/st match");
1117  assert(LoadSize <= StoreSize && "Invalid load size");
1118  int UnscaledLdOffset =
1119  IsUnscaled
1121  : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1122  int UnscaledStOffset =
1123  IsUnscaled
1125  : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1126  int Width = LoadSize * 8;
1127  Register DestReg =
1128  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1129  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1130  : LdRt;
1131 
1132  assert((UnscaledLdOffset >= UnscaledStOffset &&
1133  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1134  "Invalid offset");
1135 
1136  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1137  int Imms = Immr + Width - 1;
1138  if (UnscaledLdOffset == UnscaledStOffset) {
1139  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1140  | ((Immr) << 6) // immr
1141  | ((Imms) << 0) // imms
1142  ;
1143 
1144  BitExtMI =
1145  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1146  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1147  DestReg)
1148  .add(StMO)
1149  .addImm(AndMaskEncoded)
1150  .setMIFlags(LoadI->getFlags());
1151  } else {
1152  BitExtMI =
1153  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1154  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1155  DestReg)
1156  .add(StMO)
1157  .addImm(Immr)
1158  .addImm(Imms)
1159  .setMIFlags(LoadI->getFlags());
1160  }
1161  }
1162 
1163  // Clear kill flags between store and load.
1164  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1165  BitExtMI->getIterator()))
1166  if (MI.killsRegister(StRt, TRI)) {
1167  MI.clearRegisterKills(StRt, TRI);
1168  break;
1169  }
1170 
1171  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1172  LLVM_DEBUG(StoreI->print(dbgs()));
1173  LLVM_DEBUG(dbgs() << " ");
1174  LLVM_DEBUG(LoadI->print(dbgs()));
1175  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1176  LLVM_DEBUG(StoreI->print(dbgs()));
1177  LLVM_DEBUG(dbgs() << " ");
1178  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1179  LLVM_DEBUG(dbgs() << "\n");
1180 
1181  // Erase the old instructions.
1182  LoadI->eraseFromParent();
1183  return NextI;
1184 }
1185 
1186 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1187  // Convert the byte-offset used by unscaled into an "element" offset used
1188  // by the scaled pair load/store instructions.
1189  if (IsUnscaled) {
1190  // If the byte-offset isn't a multiple of the stride, there's no point
1191  // trying to match it.
1192  if (Offset % OffsetStride)
1193  return false;
1194  Offset /= OffsetStride;
1195  }
1196  return Offset <= 63 && Offset >= -64;
1197 }
1198 
1199 // Do alignment, specialized to power of 2 and for signed ints,
1200 // avoiding having to do a C-style cast from uint_64t to int when
1201 // using alignTo from include/llvm/Support/MathExtras.h.
1202 // FIXME: Move this function to include/MathExtras.h?
1203 static int alignTo(int Num, int PowOf2) {
1204  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1205 }
1206 
1207 static bool mayAlias(MachineInstr &MIa,
1209  AliasAnalysis *AA) {
1210  for (MachineInstr *MIb : MemInsns)
1211  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1212  return true;
1213 
1214  return false;
1215 }
1216 
1217 bool AArch64LoadStoreOpt::findMatchingStore(
1218  MachineBasicBlock::iterator I, unsigned Limit,
1219  MachineBasicBlock::iterator &StoreI) {
1220  MachineBasicBlock::iterator B = I->getParent()->begin();
1222  MachineInstr &LoadMI = *I;
1223  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
1224 
1225  // If the load is the first instruction in the block, there's obviously
1226  // not any matching store.
1227  if (MBBI == B)
1228  return false;
1229 
1230  // Track which register units have been modified and used between the first
1231  // insn and the second insn.
1232  ModifiedRegUnits.clear();
1233  UsedRegUnits.clear();
1234 
1235  unsigned Count = 0;
1236  do {
1237  MBBI = prev_nodbg(MBBI, B);
1238  MachineInstr &MI = *MBBI;
1239 
1240  // Don't count transient instructions towards the search limit since there
1241  // may be different numbers of them if e.g. debug information is present.
1242  if (!MI.isTransient())
1243  ++Count;
1244 
1245  // If the load instruction reads directly from the address to which the
1246  // store instruction writes and the stored value is not modified, we can
1247  // promote the load. Since we do not handle stores with pre-/post-index,
1248  // it's unnecessary to check if BaseReg is modified by the store itself.
1249  // Also we can't handle stores without an immediate offset operand,
1250  // while the operand might be the address for a global variable.
1251  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1252  BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
1254  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1255  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1256  StoreI = MBBI;
1257  return true;
1258  }
1259 
1260  if (MI.isCall())
1261  return false;
1262 
1263  // Update modified / uses register units.
1264  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1265 
1266  // Otherwise, if the base register is modified, we have no match, so
1267  // return early.
1268  if (!ModifiedRegUnits.available(BaseReg))
1269  return false;
1270 
1271  // If we encounter a store aliased with the load, return early.
1272  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1273  return false;
1274  } while (MBBI != B && Count < Limit);
1275  return false;
1276 }
1277 
1278 // Returns true if FirstMI and MI are candidates for merging or pairing.
1279 // Otherwise, returns false.
1281  LdStPairFlags &Flags,
1282  const AArch64InstrInfo *TII) {
1283  // If this is volatile or if pairing is suppressed, not a candidate.
1284  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1285  return false;
1286 
1287  // We should have already checked FirstMI for pair suppression and volatility.
1288  assert(!FirstMI.hasOrderedMemoryRef() &&
1289  !TII->isLdStPairSuppressed(FirstMI) &&
1290  "FirstMI shouldn't get here if either of these checks are true.");
1291 
1292  unsigned OpcA = FirstMI.getOpcode();
1293  unsigned OpcB = MI.getOpcode();
1294 
1295  // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1296  if (OpcA == OpcB)
1297  return !AArch64InstrInfo::isPreLdSt(FirstMI);
1298 
1299  // Try to match a sign-extended load/store with a zero-extended load/store.
1300  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1301  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1302  assert(IsValidLdStrOpc &&
1303  "Given Opc should be a Load or Store with an immediate");
1304  // OpcA will be the first instruction in the pair.
1305  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1306  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1307  return true;
1308  }
1309 
1310  // If the second instruction isn't even a mergable/pairable load/store, bail
1311  // out.
1312  if (!PairIsValidLdStrOpc)
1313  return false;
1314 
1315  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1316  // offsets.
1317  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1318  return false;
1319 
1320  // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1321  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1322  // are candidate pairs that can be merged.
1323  if (isPreLdStPairCandidate(FirstMI, MI))
1324  return true;
1325 
1326  // Try to match an unscaled load/store with a scaled load/store.
1327  return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1329 
1330  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1331 }
1332 
1333 static bool
1334 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1336  const TargetRegisterInfo *TRI) {
1337  if (!FirstMI.mayStore())
1338  return false;
1339 
1340  // Check if we can find an unused register which we can use to rename
1341  // the register used by the first load/store.
1342  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1343  MachineFunction &MF = *FirstMI.getParent()->getParent();
1344  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1345  return false;
1346 
1347  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1348  // For now, we only rename if the store operand gets killed at the store.
1349  if (!getLdStRegOp(FirstMI).isKill() &&
1350  !any_of(FirstMI.operands(),
1351  [TRI, RegToRename](const MachineOperand &MOP) {
1352  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1353  MOP.isImplicit() && MOP.isKill() &&
1354  TRI->regsOverlap(RegToRename, MOP.getReg());
1355  })) {
1356  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1357  return false;
1358  }
1359  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1360  if (MOP.isReg()) {
1361  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1362  // Renaming registers with multiple disjunct sub-registers (e.g. the
1363  // result of a LD3) means that all sub-registers are renamed, potentially
1364  // impacting other instructions we did not check. Bail out.
1365  // Note that this relies on the structure of the AArch64 register file. In
1366  // particular, a subregister cannot be written without overwriting the
1367  // whole register.
1368  if (RegClass->HasDisjunctSubRegs) {
1369  LLVM_DEBUG(
1370  dbgs()
1371  << " Cannot rename operands with multiple disjunct subregisters ("
1372  << MOP << ")\n");
1373  return false;
1374  }
1375  }
1376  return MOP.isImplicit() ||
1377  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1378  };
1379 
1380  bool FoundDef = false;
1381 
1382  // For each instruction between FirstMI and the previous def for RegToRename,
1383  // we
1384  // * check if we can rename RegToRename in this instruction
1385  // * collect the registers used and required register classes for RegToRename.
1386  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1387  bool IsDef) {
1388  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1389  // Currently we do not try to rename across frame-setup instructions.
1390  if (MI.getFlag(MachineInstr::FrameSetup)) {
1391  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1392  << MI << ")\n");
1393  return false;
1394  }
1395 
1396  UsedInBetween.accumulate(MI);
1397 
1398  // For a definition, check that we can rename the definition and exit the
1399  // loop.
1400  FoundDef = IsDef;
1401 
1402  // For defs, check if we can rename the first def of RegToRename.
1403  if (FoundDef) {
1404  // For some pseudo instructions, we might not generate code in the end
1405  // (e.g. KILL) and we would end up without a correct def for the rename
1406  // register.
1407  // TODO: This might be overly conservative and we could handle those cases
1408  // in multiple ways:
1409  // 1. Insert an extra copy, to materialize the def.
1410  // 2. Skip pseudo-defs until we find an non-pseudo def.
1411  if (MI.isPseudo()) {
1412  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1413  << "\n");
1414  return false;
1415  }
1416 
1417  for (auto &MOP : MI.operands()) {
1418  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1419  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1420  continue;
1421  if (!canRenameMOP(MOP)) {
1422  LLVM_DEBUG(dbgs()
1423  << " Cannot rename " << MOP << " in " << MI << "\n");
1424  return false;
1425  }
1426  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1427  }
1428  return true;
1429  } else {
1430  for (auto &MOP : MI.operands()) {
1431  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1432  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1433  continue;
1434 
1435  if (!canRenameMOP(MOP)) {
1436  LLVM_DEBUG(dbgs()
1437  << " Cannot rename " << MOP << " in " << MI << "\n");
1438  return false;
1439  }
1440  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1441  }
1442  }
1443  return true;
1444  };
1445 
1446  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1447  return false;
1448 
1449  if (!FoundDef) {
1450  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1451  return false;
1452  }
1453  return true;
1454 }
1455 
1456 // Check if we can find a physical register for renaming \p Reg. This register
1457 // must:
1458 // * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1459 // defined registers up to the point where the renamed register will be used,
1460 // * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1461 // registers in the range the rename register will be used,
1462 // * is available in all used register classes (checked using RequiredClasses).
1464  const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1465  LiveRegUnits &UsedInBetween,
1467  const TargetRegisterInfo *TRI) {
1468  const MachineRegisterInfo &RegInfo = MF.getRegInfo();
1469 
1470  // Checks if any sub- or super-register of PR is callee saved.
1471  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1473  [&MF, TRI](MCPhysReg SubOrSuper) {
1474  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1475  });
1476  };
1477 
1478  // Check if PR or one of its sub- or super-registers can be used for all
1479  // required register classes.
1480  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1481  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1483  [C, TRI](MCPhysReg SubOrSuper) {
1484  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1485  });
1486  });
1487  };
1488 
1489  auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1490  for (const MCPhysReg &PR : *RegClass) {
1491  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1492  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1493  CanBeUsedForAllClasses(PR)) {
1494  DefinedInBB.addReg(PR);
1495  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1496  << "\n");
1497  return {PR};
1498  }
1499  }
1500  LLVM_DEBUG(dbgs() << "No rename register found from "
1501  << TRI->getRegClassName(RegClass) << "\n");
1502  return None;
1503 }
1504 
1505 /// Scan the instructions looking for a load/store that can be combined with the
1506 /// current instruction into a wider equivalent or a load/store pair.
1508 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1509  LdStPairFlags &Flags, unsigned Limit,
1510  bool FindNarrowMerge) {
1511  MachineBasicBlock::iterator E = I->getParent()->end();
1513  MachineBasicBlock::iterator MBBIWithRenameReg;
1514  MachineInstr &FirstMI = *I;
1515  MBBI = next_nodbg(MBBI, E);
1516 
1517  bool MayLoad = FirstMI.mayLoad();
1518  bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1519  Register Reg = getLdStRegOp(FirstMI).getReg();
1520  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
1522  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1523  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1524 
1525  Optional<bool> MaybeCanRename = None;
1526  if (!EnableRenaming)
1527  MaybeCanRename = {false};
1528 
1530  LiveRegUnits UsedInBetween;
1531  UsedInBetween.init(*TRI);
1532 
1533  Flags.clearRenameReg();
1534 
1535  // Track which register units have been modified and used between the first
1536  // insn (inclusive) and the second insn.
1537  ModifiedRegUnits.clear();
1538  UsedRegUnits.clear();
1539 
1540  // Remember any instructions that read/write memory between FirstMI and MI.
1542 
1543  for (unsigned Count = 0; MBBI != E && Count < Limit;
1544  MBBI = next_nodbg(MBBI, E)) {
1545  MachineInstr &MI = *MBBI;
1546 
1547  UsedInBetween.accumulate(MI);
1548 
1549  // Don't count transient instructions towards the search limit since there
1550  // may be different numbers of them if e.g. debug information is present.
1551  if (!MI.isTransient())
1552  ++Count;
1553 
1554  Flags.setSExtIdx(-1);
1555  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1557  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1558  // If we've found another instruction with the same opcode, check to see
1559  // if the base and offset are compatible with our starting instruction.
1560  // These instructions all have scaled immediate operands, so we just
1561  // check for +1/-1. Make sure to check the new instruction offset is
1562  // actually an immediate and not a symbolic reference destined for
1563  // a relocation.
1565  int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
1566  bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1567  if (IsUnscaled != MIIsUnscaled) {
1568  // We're trying to pair instructions that differ in how they are scaled.
1569  // If FirstMI is scaled then scale the offset of MI accordingly.
1570  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1571  int MemSize = TII->getMemScale(MI);
1572  if (MIIsUnscaled) {
1573  // If the unscaled offset isn't a multiple of the MemSize, we can't
1574  // pair the operations together: bail and keep looking.
1575  if (MIOffset % MemSize) {
1576  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1577  UsedRegUnits, TRI);
1578  MemInsns.push_back(&MI);
1579  continue;
1580  }
1581  MIOffset /= MemSize;
1582  } else {
1583  MIOffset *= MemSize;
1584  }
1585  }
1586 
1587  bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1588 
1589  if (BaseReg == MIBaseReg) {
1590  // If the offset of the second ld/st is not equal to the size of the
1591  // destination register it can’t be paired with a pre-index ld/st
1592  // pair. Additionally if the base reg is used or modified the operations
1593  // can't be paired: bail and keep looking.
1594  if (IsPreLdSt) {
1595  bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1596  bool IsBaseRegUsed = !UsedRegUnits.available(
1598  bool IsBaseRegModified = !ModifiedRegUnits.available(
1600  // If the stored value and the address of the second instruction is
1601  // the same, it needs to be using the updated register and therefore
1602  // it must not be folded.
1603  bool IsMIRegTheSame =
1606  if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1607  IsMIRegTheSame) {
1608  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1609  UsedRegUnits, TRI);
1610  MemInsns.push_back(&MI);
1611  continue;
1612  }
1613  } else {
1614  if ((Offset != MIOffset + OffsetStride) &&
1615  (Offset + OffsetStride != MIOffset)) {
1616  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1617  UsedRegUnits, TRI);
1618  MemInsns.push_back(&MI);
1619  continue;
1620  }
1621  }
1622 
1623  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1624  if (FindNarrowMerge) {
1625  // If the alignment requirements of the scaled wide load/store
1626  // instruction can't express the offset of the scaled narrow input,
1627  // bail and keep looking. For promotable zero stores, allow only when
1628  // the stored value is the same (i.e., WZR).
1629  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1630  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1631  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1632  UsedRegUnits, TRI);
1633  MemInsns.push_back(&MI);
1634  continue;
1635  }
1636  } else {
1637  // Pairwise instructions have a 7-bit signed offset field. Single
1638  // insns have a 12-bit unsigned offset field. If the resultant
1639  // immediate offset of merging these instructions is out of range for
1640  // a pairwise instruction, bail and keep looking.
1641  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1642  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1643  UsedRegUnits, TRI);
1644  MemInsns.push_back(&MI);
1645  continue;
1646  }
1647  // If the alignment requirements of the paired (scaled) instruction
1648  // can't express the offset of the unscaled input, bail and keep
1649  // looking.
1650  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1651  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1652  UsedRegUnits, TRI);
1653  MemInsns.push_back(&MI);
1654  continue;
1655  }
1656  }
1657  // If the destination register of one load is the same register or a
1658  // sub/super register of the other load, bail and keep looking. A
1659  // load-pair instruction with both destination registers the same is
1660  // UNPREDICTABLE and will result in an exception.
1661  if (MayLoad &&
1663  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1664  TRI);
1665  MemInsns.push_back(&MI);
1666  continue;
1667  }
1668 
1669  // If the BaseReg has been modified, then we cannot do the optimization.
1670  // For example, in the following pattern
1671  // ldr x1 [x2]
1672  // ldr x2 [x3]
1673  // ldr x4 [x2, #8],
1674  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1675  if (!ModifiedRegUnits.available(BaseReg))
1676  return E;
1677 
1678  // If the Rt of the second instruction was not modified or used between
1679  // the two instructions and none of the instructions between the second
1680  // and first alias with the second, we can combine the second into the
1681  // first.
1682  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1683  !(MI.mayLoad() &&
1684  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1685  !mayAlias(MI, MemInsns, AA)) {
1686 
1687  Flags.setMergeForward(false);
1688  Flags.clearRenameReg();
1689  return MBBI;
1690  }
1691 
1692  // Likewise, if the Rt of the first instruction is not modified or used
1693  // between the two instructions and none of the instructions between the
1694  // first and the second alias with the first, we can combine the first
1695  // into the second.
1696  if (!(MayLoad &&
1697  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1698  !mayAlias(FirstMI, MemInsns, AA)) {
1699 
1700  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1701  Flags.setMergeForward(true);
1702  Flags.clearRenameReg();
1703  return MBBI;
1704  }
1705 
1706  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1707  if (!MaybeCanRename)
1708  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1709  RequiredClasses, TRI)};
1710 
1711  if (*MaybeCanRename) {
1713  *FirstMI.getParent()->getParent(), Reg, DefinedInBB,
1714  UsedInBetween, RequiredClasses, TRI);
1715  if (MaybeRenameReg) {
1716  Flags.setRenameReg(*MaybeRenameReg);
1717  Flags.setMergeForward(true);
1718  MBBIWithRenameReg = MBBI;
1719  }
1720  }
1721  }
1722  }
1723  // Unable to combine these instructions due to interference in between.
1724  // Keep looking.
1725  }
1726  }
1727 
1728  if (Flags.getRenameReg())
1729  return MBBIWithRenameReg;
1730 
1731  // If the instruction wasn't a matching load or store. Stop searching if we
1732  // encounter a call instruction that might modify memory.
1733  if (MI.isCall())
1734  return E;
1735 
1736  // Update modified / uses register units.
1737  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1738 
1739  // Otherwise, if the base register is modified, we have no match, so
1740  // return early.
1741  if (!ModifiedRegUnits.available(BaseReg))
1742  return E;
1743 
1744  // Update list of instructions that read/write memory.
1745  if (MI.mayLoadOrStore())
1746  MemInsns.push_back(&MI);
1747  }
1748  return E;
1749 }
1750 
1753  auto End = MI.getParent()->end();
1754  if (MaybeCFI == End ||
1755  MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1756  !(MI.getFlag(MachineInstr::FrameSetup) ||
1757  MI.getFlag(MachineInstr::FrameDestroy)) ||
1758  AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
1759  return End;
1760 
1761  const MachineFunction &MF = *MI.getParent()->getParent();
1762  unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1763  const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
1764  switch (CFI.getOperation()) {
1767  return MaybeCFI;
1768  default:
1769  return End;
1770  }
1771 }
1772 
1774 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1776  bool IsPreIdx) {
1777  assert((Update->getOpcode() == AArch64::ADDXri ||
1778  Update->getOpcode() == AArch64::SUBXri) &&
1779  "Unexpected base register update instruction to merge!");
1780  MachineBasicBlock::iterator E = I->getParent()->end();
1782 
1783  // If updating the SP and the following instruction is CFA offset related CFI
1784  // instruction move it after the merged instruction.
1786  IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
1787 
1788  // Return the instruction following the merged instruction, which is
1789  // the instruction following our unmerged load. Unless that's the add/sub
1790  // instruction we're merging, in which case it's the one after that.
1791  if (NextI == Update)
1792  NextI = next_nodbg(NextI, E);
1793 
1794  int Value = Update->getOperand(2).getImm();
1795  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1796  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1797  if (Update->getOpcode() == AArch64::SUBXri)
1798  Value = -Value;
1799 
1800  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1802  MachineInstrBuilder MIB;
1803  int Scale, MinOffset, MaxOffset;
1804  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1806  // Non-paired instruction.
1807  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1808  .add(getLdStRegOp(*Update))
1809  .add(getLdStRegOp(*I))
1811  .addImm(Value / Scale)
1812  .setMemRefs(I->memoperands())
1813  .setMIFlags(I->mergeFlagsWith(*Update));
1814  } else {
1815  // Paired instruction.
1816  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1817  .add(getLdStRegOp(*Update))
1818  .add(getLdStRegOp(*I, 0))
1819  .add(getLdStRegOp(*I, 1))
1821  .addImm(Value / Scale)
1822  .setMemRefs(I->memoperands())
1823  .setMIFlags(I->mergeFlagsWith(*Update));
1824  }
1825  if (CFI != E) {
1826  MachineBasicBlock *MBB = I->getParent();
1827  MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
1828  }
1829 
1830  if (IsPreIdx) {
1831  ++NumPreFolded;
1832  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1833  } else {
1834  ++NumPostFolded;
1835  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1836  }
1837  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1838  LLVM_DEBUG(I->print(dbgs()));
1839  LLVM_DEBUG(dbgs() << " ");
1840  LLVM_DEBUG(Update->print(dbgs()));
1841  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1842  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1843  LLVM_DEBUG(dbgs() << "\n");
1844 
1845  // Erase the old instructions for the block.
1846  I->eraseFromParent();
1847  Update->eraseFromParent();
1848 
1849  return NextI;
1850 }
1851 
1852 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1853  MachineInstr &MI,
1854  unsigned BaseReg, int Offset) {
1855  switch (MI.getOpcode()) {
1856  default:
1857  break;
1858  case AArch64::SUBXri:
1859  case AArch64::ADDXri:
1860  // Make sure it's a vanilla immediate operand, not a relocation or
1861  // anything else we can't handle.
1862  if (!MI.getOperand(2).isImm())
1863  break;
1864  // Watch out for 1 << 12 shifted value.
1865  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1866  break;
1867 
1868  // The update instruction source and destination register must be the
1869  // same as the load/store base register.
1870  if (MI.getOperand(0).getReg() != BaseReg ||
1871  MI.getOperand(1).getReg() != BaseReg)
1872  break;
1873 
1874  int UpdateOffset = MI.getOperand(2).getImm();
1875  if (MI.getOpcode() == AArch64::SUBXri)
1876  UpdateOffset = -UpdateOffset;
1877 
1878  // The immediate must be a multiple of the scaling factor of the pre/post
1879  // indexed instruction.
1880  int Scale, MinOffset, MaxOffset;
1881  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1882  if (UpdateOffset % Scale != 0)
1883  break;
1884 
1885  // Scaled offset must fit in the instruction immediate.
1886  int ScaledOffset = UpdateOffset / Scale;
1887  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1888  break;
1889 
1890  // If we have a non-zero Offset, we check that it matches the amount
1891  // we're adding to the register.
1892  if (!Offset || Offset == UpdateOffset)
1893  return true;
1894  break;
1895  }
1896  return false;
1897 }
1898 
1899 static bool needsWinCFI(const MachineFunction *MF) {
1900  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1902 }
1903 
1904 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1905  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1906  MachineBasicBlock::iterator E = I->getParent()->end();
1907  MachineInstr &MemMI = *I;
1909 
1910  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1911  int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
1912  TII->getMemScale(MemMI);
1913 
1914  // Scan forward looking for post-index opportunities. Updating instructions
1915  // can't be formed if the memory instruction doesn't have the offset we're
1916  // looking for.
1917  if (MIUnscaledOffset != UnscaledOffset)
1918  return E;
1919 
1920  // If the base register overlaps a source/destination register, we can't
1921  // merge the update. This does not apply to tag store instructions which
1922  // ignore the address part of the source register.
1923  // This does not apply to STGPi as well, which does not have unpredictable
1924  // behavior in this case unlike normal stores, and always performs writeback
1925  // after reading the source register value.
1926  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1927  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
1928  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1929  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1930  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1931  return E;
1932  }
1933  }
1934 
1935  // Track which register units have been modified and used between the first
1936  // insn (inclusive) and the second insn.
1937  ModifiedRegUnits.clear();
1938  UsedRegUnits.clear();
1939  MBBI = next_nodbg(MBBI, E);
1940 
1941  // We can't post-increment the stack pointer if any instruction between
1942  // the memory access (I) and the increment (MBBI) can access the memory
1943  // region defined by [SP, MBBI].
1944  const bool BaseRegSP = BaseReg == AArch64::SP;
1945  if (BaseRegSP && needsWinCFI(I->getMF())) {
1946  // FIXME: For now, we always block the optimization over SP in windows
1947  // targets as it requires to adjust the unwind/debug info, messing up
1948  // the unwind info can actually cause a miscompile.
1949  return E;
1950  }
1951 
1952  for (unsigned Count = 0; MBBI != E && Count < Limit;
1953  MBBI = next_nodbg(MBBI, E)) {
1954  MachineInstr &MI = *MBBI;
1955 
1956  // Don't count transient instructions towards the search limit since there
1957  // may be different numbers of them if e.g. debug information is present.
1958  if (!MI.isTransient())
1959  ++Count;
1960 
1961  // If we found a match, return it.
1962  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1963  return MBBI;
1964 
1965  // Update the status of what the instruction clobbered and used.
1966  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1967 
1968  // Otherwise, if the base register is used or modified, we have no match, so
1969  // return early.
1970  // If we are optimizing SP, do not allow instructions that may load or store
1971  // in between the load and the optimized value update.
1972  if (!ModifiedRegUnits.available(BaseReg) ||
1973  !UsedRegUnits.available(BaseReg) ||
1974  (BaseRegSP && MBBI->mayLoadOrStore()))
1975  return E;
1976  }
1977  return E;
1978 }
1979 
1980 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1981  MachineBasicBlock::iterator I, unsigned Limit) {
1982  MachineBasicBlock::iterator B = I->getParent()->begin();
1983  MachineBasicBlock::iterator E = I->getParent()->end();
1984  MachineInstr &MemMI = *I;
1986  MachineFunction &MF = *MemMI.getMF();
1987 
1988  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1990 
1991  // If the load/store is the first instruction in the block, there's obviously
1992  // not any matching update. Ditto if the memory offset isn't zero.
1993  if (MBBI == B || Offset != 0)
1994  return E;
1995  // If the base register overlaps a destination register, we can't
1996  // merge the update.
1997  if (!isTagStore(MemMI)) {
1998  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
1999  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2000  Register DestReg = getLdStRegOp(MemMI, i).getReg();
2001  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2002  return E;
2003  }
2004  }
2005 
2006  const bool BaseRegSP = BaseReg == AArch64::SP;
2007  if (BaseRegSP && needsWinCFI(I->getMF())) {
2008  // FIXME: For now, we always block the optimization over SP in windows
2009  // targets as it requires to adjust the unwind/debug info, messing up
2010  // the unwind info can actually cause a miscompile.
2011  return E;
2012  }
2013 
2014  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2015  unsigned RedZoneSize =
2016  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2017 
2018  // Track which register units have been modified and used between the first
2019  // insn (inclusive) and the second insn.
2020  ModifiedRegUnits.clear();
2021  UsedRegUnits.clear();
2022  unsigned Count = 0;
2023  bool MemAcessBeforeSPPreInc = false;
2024  do {
2025  MBBI = prev_nodbg(MBBI, B);
2026  MachineInstr &MI = *MBBI;
2027 
2028  // Don't count transient instructions towards the search limit since there
2029  // may be different numbers of them if e.g. debug information is present.
2030  if (!MI.isTransient())
2031  ++Count;
2032 
2033  // If we found a match, return it.
2034  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2035  // Check that the update value is within our red zone limit (which may be
2036  // zero).
2037  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2038  return E;
2039  return MBBI;
2040  }
2041 
2042  // Update the status of what the instruction clobbered and used.
2043  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2044 
2045  // Otherwise, if the base register is used or modified, we have no match, so
2046  // return early.
2047  if (!ModifiedRegUnits.available(BaseReg) ||
2048  !UsedRegUnits.available(BaseReg))
2049  return E;
2050  // Keep track if we have a memory access before an SP pre-increment, in this
2051  // case we need to validate later that the update amount respects the red
2052  // zone.
2053  if (BaseRegSP && MBBI->mayLoadOrStore())
2054  MemAcessBeforeSPPreInc = true;
2055  } while (MBBI != B && Count < Limit);
2056  return E;
2057 }
2058 
2059 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2061  MachineInstr &MI = *MBBI;
2062  // If this is a volatile load, don't mess with it.
2063  if (MI.hasOrderedMemoryRef())
2064  return false;
2065 
2066  // Make sure this is a reg+imm.
2067  // FIXME: It is possible to extend it to handle reg+reg cases.
2069  return false;
2070 
2071  // Look backward up to LdStLimit instructions.
2073  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2074  ++NumLoadsFromStoresPromoted;
2075  // Promote the load. Keeping the iterator straight is a
2076  // pain, so we let the merge routine tell us what the next instruction
2077  // is after it's done mucking about.
2078  MBBI = promoteLoadFromStore(MBBI, StoreI);
2079  return true;
2080  }
2081  return false;
2082 }
2083 
2084 // Merge adjacent zero stores into a wider store.
2085 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2087  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2088  MachineInstr &MI = *MBBI;
2089  MachineBasicBlock::iterator E = MI.getParent()->end();
2090 
2091  if (!TII->isCandidateToMergeOrPair(MI))
2092  return false;
2093 
2094  // Look ahead up to LdStLimit instructions for a mergable instruction.
2095  LdStPairFlags Flags;
2096  MachineBasicBlock::iterator MergeMI =
2097  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2098  if (MergeMI != E) {
2099  ++NumZeroStoresPromoted;
2100 
2101  // Keeping the iterator straight is a pain, so we let the merge routine tell
2102  // us what the next instruction is after it's done mucking about.
2103  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2104  return true;
2105  }
2106  return false;
2107 }
2108 
2109 // Find loads and stores that can be merged into a single load or store pair
2110 // instruction.
2111 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2112  MachineInstr &MI = *MBBI;
2113  MachineBasicBlock::iterator E = MI.getParent()->end();
2114 
2115  if (!TII->isCandidateToMergeOrPair(MI))
2116  return false;
2117 
2118  // Early exit if the offset is not possible to match. (6 bits of positive
2119  // range, plus allow an extra one in case we find a later insn that matches
2120  // with Offset-1)
2121  bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2123  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2124  // Allow one more for offset.
2125  if (Offset > 0)
2126  Offset -= OffsetStride;
2127  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2128  return false;
2129 
2130  // Look ahead up to LdStLimit instructions for a pairable instruction.
2131  LdStPairFlags Flags;
2133  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2134  if (Paired != E) {
2135  ++NumPairCreated;
2136  if (TII->hasUnscaledLdStOffset(MI))
2137  ++NumUnscaledPairCreated;
2138  // Keeping the iterator straight is a pain, so we let the merge routine tell
2139  // us what the next instruction is after it's done mucking about.
2140  auto Prev = std::prev(MBBI);
2141  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2142  // Collect liveness info for instructions between Prev and the new position
2143  // MBBI.
2144  for (auto I = std::next(Prev); I != MBBI; I++)
2145  updateDefinedRegisters(*I, DefinedInBB, TRI);
2146 
2147  return true;
2148  }
2149  return false;
2150 }
2151 
2152 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2154  MachineInstr &MI = *MBBI;
2155  MachineBasicBlock::iterator E = MI.getParent()->end();
2157 
2158  // Look forward to try to form a post-index instruction. For example,
2159  // ldr x0, [x20]
2160  // add x20, x20, #32
2161  // merged into:
2162  // ldr x0, [x20], #32
2163  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2164  if (Update != E) {
2165  // Merge the update into the ld/st.
2166  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2167  return true;
2168  }
2169 
2170  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2171  if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2172  return false;
2173 
2174  // Look back to try to find a pre-index instruction. For example,
2175  // add x0, x0, #8
2176  // ldr x1, [x0]
2177  // merged into:
2178  // ldr x1, [x0, #8]!
2179  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2180  if (Update != E) {
2181  // Merge the update into the ld/st.
2182  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2183  return true;
2184  }
2185 
2186  // The immediate in the load/store is scaled by the size of the memory
2187  // operation. The immediate in the add we're looking for,
2188  // however, is not, so adjust here.
2189  int UnscaledOffset =
2190  AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2191 
2192  // Look forward to try to find a pre-index instruction. For example,
2193  // ldr x1, [x0, #64]
2194  // add x0, x0, #64
2195  // merged into:
2196  // ldr x1, [x0, #64]!
2197  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2198  if (Update != E) {
2199  // Merge the update into the ld/st.
2200  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2201  return true;
2202  }
2203 
2204  return false;
2205 }
2206 
2208  bool EnableNarrowZeroStOpt) {
2209 
2210  bool Modified = false;
2211  // Four tranformations to do here:
2212  // 1) Find loads that directly read from stores and promote them by
2213  // replacing with mov instructions. If the store is wider than the load,
2214  // the load will be replaced with a bitfield extract.
2215  // e.g.,
2216  // str w1, [x0, #4]
2217  // ldrh w2, [x0, #6]
2218  // ; becomes
2219  // str w1, [x0, #4]
2220  // lsr w2, w1, #16
2222  MBBI != E;) {
2223  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2224  Modified = true;
2225  else
2226  ++MBBI;
2227  }
2228  // 2) Merge adjacent zero stores into a wider store.
2229  // e.g.,
2230  // strh wzr, [x0]
2231  // strh wzr, [x0, #2]
2232  // ; becomes
2233  // str wzr, [x0]
2234  // e.g.,
2235  // str wzr, [x0]
2236  // str wzr, [x0, #4]
2237  // ; becomes
2238  // str xzr, [x0]
2239  if (EnableNarrowZeroStOpt)
2241  MBBI != E;) {
2242  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2243  Modified = true;
2244  else
2245  ++MBBI;
2246  }
2247  // 3) Find loads and stores that can be merged into a single load or store
2248  // pair instruction.
2249  // e.g.,
2250  // ldr x0, [x2]
2251  // ldr x1, [x2, #8]
2252  // ; becomes
2253  // ldp x0, x1, [x2]
2254 
2255  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2256  DefinedInBB.clear();
2257  DefinedInBB.addLiveIns(MBB);
2258  }
2259 
2261  MBBI != E;) {
2262  // Track currently live registers up to this point, to help with
2263  // searching for a rename register on demand.
2264  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2265  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2266  Modified = true;
2267  else
2268  ++MBBI;
2269  }
2270  // 4) Find base register updates that can be merged into the load or store
2271  // as a base-reg writeback.
2272  // e.g.,
2273  // ldr x0, [x2]
2274  // add x2, x2, #4
2275  // ; becomes
2276  // ldr x0, [x2], #4
2278  MBBI != E;) {
2279  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2280  Modified = true;
2281  else
2282  ++MBBI;
2283  }
2284 
2285  return Modified;
2286 }
2287 
2288 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2289  if (skipFunction(Fn.getFunction()))
2290  return false;
2291 
2292  Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
2293  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2294  TRI = Subtarget->getRegisterInfo();
2295  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2296 
2297  // Resize the modified and used register unit trackers. We do this once
2298  // per function and then clear the register units each time we optimize a load
2299  // or store.
2300  ModifiedRegUnits.init(*TRI);
2301  UsedRegUnits.init(*TRI);
2302  DefinedInBB.init(*TRI);
2303 
2304  bool Modified = false;
2305  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2306  for (auto &MBB : Fn) {
2307  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2308  Modified |= M;
2309  }
2310 
2311  return Modified;
2312 }
2313 
2314 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2315 // stores near one another? Note: The pre-RA instruction scheduler already has
2316 // hooks to try and schedule pairable loads/stores together to improve pairing
2317 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2318 
2319 // FIXME: When pairing store instructions it's very possible for this pass to
2320 // hoist a store with a KILL marker above another use (without a KILL marker).
2321 // The resulting IR is invalid, but nothing uses the KILL markers after this
2322 // pass, so it's never caused a problem in practice.
2323 
2324 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2325 /// load / store optimization pass.
2327  return new AArch64LoadStoreOpt();
2328 }
i
i
Definition: README.txt:29
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1262
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
MCDwarf.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:663
AArch64MachineFunctionInfo.h
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:974
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1012
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:815
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:82
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:440
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:56
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition: AArch64InstrInfo.cpp:3151
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:171
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:636
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:638
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:298
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:127
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2326
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:168
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:982
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1329
MachineRegisterInfo.h
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
llvm::MachineFunction::getFrameInstructions
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
Definition: MachineFunction.h:1054
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:195
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:218
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCCFIInstruction::OpDefCfa
@ OpDefCfa
Definition: MCDwarf.h:482
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:116
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:94
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:60
llvm::AAResults
Definition: AliasAnalysis.h:511
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:37
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:925
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:480
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1281
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:196
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1207
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:99
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:793
BitVector.h
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:86
llvm::MCCFIInstruction::getOperation
OpType getOperation() const
Definition: MCDwarf.h:635
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1280
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:591
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
llvm::None
const NoneType None
Definition: None.h:24
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MCCFIInstruction
Definition: MCDwarf.h:472
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
isPreLdStPairCandidate
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:559
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1899
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:167
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:419
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:750
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MCCFIInstruction::OpDefCfaOffset
@ OpDefCfaOffset
Definition: MCDwarf.h:481
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:268
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:235
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:756
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:407
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:588
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MCRegisterInfo.h
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1269
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:435
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1334
maybeMoveCFI
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
Definition: AArch64LoadStoreOptimizer.cpp:1752
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:205
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:620
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1186
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:982
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:219
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:172
llvm::AArch64InstrInfo::getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
Definition: AArch64InstrInfo.cpp:3175
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:645
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AArch64InstrInfo::isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
Definition: AArch64InstrInfo.cpp:3155
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1025
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:908
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:247
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:639
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:208
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:377
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:504
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:445
AA
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:850
AArch64Subtarget.h
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::AArch64InstrInfo::getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the the immediate offset operator of a load/store.
Definition: AArch64InstrInfo.cpp:3183
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1113
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:3053
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:317
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:619
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:609
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1276
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
tryToFindRegisterToRename
static Optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1463