LLVM  16.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 // The pass runs after the PrologEpilogInserter where we emit the CFI
13 // instructions. In order to preserve the correctness of the unwind informaiton,
14 // the pass should not change the order of any two instructions, one of which
15 // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16 // to unwind information.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64InstrInfo.h"
22 #include "AArch64Subtarget.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCDwarf.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <functional>
51 #include <iterator>
52 #include <limits>
53 #include <optional>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "aarch64-ldst-opt"
58 
59 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
60 STATISTIC(NumPostFolded, "Number of post-index updates folded");
61 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
62 STATISTIC(NumUnscaledPairCreated,
63  "Number of load/store from unscaled generated");
64 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
65 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
66 
67 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
68  "Controls which pairs are considered for renaming");
69 
70 // The LdStLimit limits how far we search for load/store pairs.
71 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
72  cl::init(20), cl::Hidden);
73 
74 // The UpdateLimit limits how far we search for update instructions when we form
75 // pre-/post-index instructions.
76 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
77  cl::Hidden);
78 
79 // Enable register renaming to find additional store pairing opportunities.
80 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
81  cl::init(true), cl::Hidden);
82 
83 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
84 
85 namespace {
86 
87 using LdStPairFlags = struct LdStPairFlags {
88  // If a matching instruction is found, MergeForward is set to true if the
89  // merge is to remove the first instruction and replace the second with
90  // a pair-wise insn, and false if the reverse is true.
91  bool MergeForward = false;
92 
93  // SExtIdx gives the index of the result of the load pair that must be
94  // extended. The value of SExtIdx assumes that the paired load produces the
95  // value in this order: (I, returned iterator), i.e., -1 means no value has
96  // to be extended, 0 means I, and 1 means the returned iterator.
97  int SExtIdx = -1;
98 
99  // If not none, RenameReg can be used to rename the result register of the
100  // first store in a pair. Currently this only works when merging stores
101  // forward.
102  Optional<MCPhysReg> RenameReg = None;
103 
104  LdStPairFlags() = default;
105 
106  void setMergeForward(bool V = true) { MergeForward = V; }
107  bool getMergeForward() const { return MergeForward; }
108 
109  void setSExtIdx(int V) { SExtIdx = V; }
110  int getSExtIdx() const { return SExtIdx; }
111 
112  void setRenameReg(MCPhysReg R) { RenameReg = R; }
113  void clearRenameReg() { RenameReg = None; }
114  Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
115 };
116 
117 struct AArch64LoadStoreOpt : public MachineFunctionPass {
118  static char ID;
119 
120  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
122  }
123 
124  AliasAnalysis *AA;
125  const AArch64InstrInfo *TII;
126  const TargetRegisterInfo *TRI;
127  const AArch64Subtarget *Subtarget;
128 
129  // Track which register units have been modified and used.
130  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
131  LiveRegUnits DefinedInBB;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
136  }
137 
138  // Scan the instructions looking for a load/store that can be combined
139  // with the current instruction into a load/store pair.
140  // Return the matching instruction if one is found, else MBB->end().
142  LdStPairFlags &Flags,
143  unsigned Limit,
144  bool FindNarrowMerge);
145 
146  // Scan the instructions looking for a store that writes to the address from
147  // which the current load instruction reads. Return true if one is found.
148  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
150 
151  // Merge the two instructions indicated into a wider narrow store instruction.
153  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
155  const LdStPairFlags &Flags);
156 
157  // Merge the two instructions indicated into a single pair-wise instruction.
159  mergePairedInsns(MachineBasicBlock::iterator I,
161  const LdStPairFlags &Flags);
162 
163  // Promote the load that reads directly from the address stored to.
165  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
167 
168  // Scan the instruction list to find a base register update that can
169  // be combined with the current instruction (a load or store) using
170  // pre or post indexed addressing with writeback. Scan forwards.
172  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
173  int UnscaledOffset, unsigned Limit);
174 
175  // Scan the instruction list to find a base register update that can
176  // be combined with the current instruction (a load or store) using
177  // pre or post indexed addressing with writeback. Scan backwards.
179  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
180 
181  // Find an instruction that updates the base register of the ld/st
182  // instruction.
183  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
184  unsigned BaseReg, int Offset);
185 
186  // Merge a pre- or post-index base register update into a ld/st instruction.
188  mergeUpdateInsn(MachineBasicBlock::iterator I,
189  MachineBasicBlock::iterator Update, bool IsPreIdx);
190 
191  // Find and merge zero store instructions.
192  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
193 
194  // Find and pair ldr/str instructions.
195  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
196 
197  // Find and promote load instructions which read directly from store.
198  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
199 
200  // Find and merge a base register updates before or after a ld/st instruction.
201  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
202 
203  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
204 
205  bool runOnMachineFunction(MachineFunction &Fn) override;
206 
207  MachineFunctionProperties getRequiredProperties() const override {
210  }
211 
212  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
213 };
214 
215 char AArch64LoadStoreOpt::ID = 0;
216 
217 } // end anonymous namespace
218 
219 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
220  AARCH64_LOAD_STORE_OPT_NAME, false, false)
221 
222 static bool isNarrowStore(unsigned Opc) {
223  switch (Opc) {
224  default:
225  return false;
226  case AArch64::STRBBui:
227  case AArch64::STURBBi:
228  case AArch64::STRHHui:
229  case AArch64::STURHHi:
230  return true;
231  }
232 }
233 
234 // These instruction set memory tag and either keep memory contents unchanged or
235 // set it to zero, ignoring the address part of the source register.
236 static bool isTagStore(const MachineInstr &MI) {
237  switch (MI.getOpcode()) {
238  default:
239  return false;
240  case AArch64::STGOffset:
241  case AArch64::STZGOffset:
242  case AArch64::ST2GOffset:
243  case AArch64::STZ2GOffset:
244  return true;
245  }
246 }
247 
248 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
249  bool *IsValidLdStrOpc = nullptr) {
250  if (IsValidLdStrOpc)
251  *IsValidLdStrOpc = true;
252  switch (Opc) {
253  default:
254  if (IsValidLdStrOpc)
255  *IsValidLdStrOpc = false;
257  case AArch64::STRDui:
258  case AArch64::STURDi:
259  case AArch64::STRDpre:
260  case AArch64::STRQui:
261  case AArch64::STURQi:
262  case AArch64::STRQpre:
263  case AArch64::STRBBui:
264  case AArch64::STURBBi:
265  case AArch64::STRHHui:
266  case AArch64::STURHHi:
267  case AArch64::STRWui:
268  case AArch64::STRWpre:
269  case AArch64::STURWi:
270  case AArch64::STRXui:
271  case AArch64::STRXpre:
272  case AArch64::STURXi:
273  case AArch64::LDRDui:
274  case AArch64::LDURDi:
275  case AArch64::LDRDpre:
276  case AArch64::LDRQui:
277  case AArch64::LDURQi:
278  case AArch64::LDRQpre:
279  case AArch64::LDRWui:
280  case AArch64::LDURWi:
281  case AArch64::LDRWpre:
282  case AArch64::LDRXui:
283  case AArch64::LDURXi:
284  case AArch64::LDRXpre:
285  case AArch64::STRSui:
286  case AArch64::STURSi:
287  case AArch64::STRSpre:
288  case AArch64::LDRSui:
289  case AArch64::LDURSi:
290  case AArch64::LDRSpre:
291  return Opc;
292  case AArch64::LDRSWui:
293  return AArch64::LDRWui;
294  case AArch64::LDURSWi:
295  return AArch64::LDURWi;
296  }
297 }
298 
299 static unsigned getMatchingWideOpcode(unsigned Opc) {
300  switch (Opc) {
301  default:
302  llvm_unreachable("Opcode has no wide equivalent!");
303  case AArch64::STRBBui:
304  return AArch64::STRHHui;
305  case AArch64::STRHHui:
306  return AArch64::STRWui;
307  case AArch64::STURBBi:
308  return AArch64::STURHHi;
309  case AArch64::STURHHi:
310  return AArch64::STURWi;
311  case AArch64::STURWi:
312  return AArch64::STURXi;
313  case AArch64::STRWui:
314  return AArch64::STRXui;
315  }
316 }
317 
318 static unsigned getMatchingPairOpcode(unsigned Opc) {
319  switch (Opc) {
320  default:
321  llvm_unreachable("Opcode has no pairwise equivalent!");
322  case AArch64::STRSui:
323  case AArch64::STURSi:
324  return AArch64::STPSi;
325  case AArch64::STRSpre:
326  return AArch64::STPSpre;
327  case AArch64::STRDui:
328  case AArch64::STURDi:
329  return AArch64::STPDi;
330  case AArch64::STRDpre:
331  return AArch64::STPDpre;
332  case AArch64::STRQui:
333  case AArch64::STURQi:
334  return AArch64::STPQi;
335  case AArch64::STRQpre:
336  return AArch64::STPQpre;
337  case AArch64::STRWui:
338  case AArch64::STURWi:
339  return AArch64::STPWi;
340  case AArch64::STRWpre:
341  return AArch64::STPWpre;
342  case AArch64::STRXui:
343  case AArch64::STURXi:
344  return AArch64::STPXi;
345  case AArch64::STRXpre:
346  return AArch64::STPXpre;
347  case AArch64::LDRSui:
348  case AArch64::LDURSi:
349  return AArch64::LDPSi;
350  case AArch64::LDRSpre:
351  return AArch64::LDPSpre;
352  case AArch64::LDRDui:
353  case AArch64::LDURDi:
354  return AArch64::LDPDi;
355  case AArch64::LDRDpre:
356  return AArch64::LDPDpre;
357  case AArch64::LDRQui:
358  case AArch64::LDURQi:
359  return AArch64::LDPQi;
360  case AArch64::LDRQpre:
361  return AArch64::LDPQpre;
362  case AArch64::LDRWui:
363  case AArch64::LDURWi:
364  return AArch64::LDPWi;
365  case AArch64::LDRWpre:
366  return AArch64::LDPWpre;
367  case AArch64::LDRXui:
368  case AArch64::LDURXi:
369  return AArch64::LDPXi;
370  case AArch64::LDRXpre:
371  return AArch64::LDPXpre;
372  case AArch64::LDRSWui:
373  case AArch64::LDURSWi:
374  return AArch64::LDPSWi;
375  }
376 }
377 
380  unsigned LdOpc = LoadInst.getOpcode();
381  unsigned StOpc = StoreInst.getOpcode();
382  switch (LdOpc) {
383  default:
384  llvm_unreachable("Unsupported load instruction!");
385  case AArch64::LDRBBui:
386  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
387  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
388  case AArch64::LDURBBi:
389  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
390  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
391  case AArch64::LDRHHui:
392  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
393  StOpc == AArch64::STRXui;
394  case AArch64::LDURHHi:
395  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
396  StOpc == AArch64::STURXi;
397  case AArch64::LDRWui:
398  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
399  case AArch64::LDURWi:
400  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
401  case AArch64::LDRXui:
402  return StOpc == AArch64::STRXui;
403  case AArch64::LDURXi:
404  return StOpc == AArch64::STURXi;
405  }
406 }
407 
408 static unsigned getPreIndexedOpcode(unsigned Opc) {
409  // FIXME: We don't currently support creating pre-indexed loads/stores when
410  // the load or store is the unscaled version. If we decide to perform such an
411  // optimization in the future the cases for the unscaled loads/stores will
412  // need to be added here.
413  switch (Opc) {
414  default:
415  llvm_unreachable("Opcode has no pre-indexed equivalent!");
416  case AArch64::STRSui:
417  return AArch64::STRSpre;
418  case AArch64::STRDui:
419  return AArch64::STRDpre;
420  case AArch64::STRQui:
421  return AArch64::STRQpre;
422  case AArch64::STRBBui:
423  return AArch64::STRBBpre;
424  case AArch64::STRHHui:
425  return AArch64::STRHHpre;
426  case AArch64::STRWui:
427  return AArch64::STRWpre;
428  case AArch64::STRXui:
429  return AArch64::STRXpre;
430  case AArch64::LDRSui:
431  return AArch64::LDRSpre;
432  case AArch64::LDRDui:
433  return AArch64::LDRDpre;
434  case AArch64::LDRQui:
435  return AArch64::LDRQpre;
436  case AArch64::LDRBBui:
437  return AArch64::LDRBBpre;
438  case AArch64::LDRHHui:
439  return AArch64::LDRHHpre;
440  case AArch64::LDRWui:
441  return AArch64::LDRWpre;
442  case AArch64::LDRXui:
443  return AArch64::LDRXpre;
444  case AArch64::LDRSWui:
445  return AArch64::LDRSWpre;
446  case AArch64::LDPSi:
447  return AArch64::LDPSpre;
448  case AArch64::LDPSWi:
449  return AArch64::LDPSWpre;
450  case AArch64::LDPDi:
451  return AArch64::LDPDpre;
452  case AArch64::LDPQi:
453  return AArch64::LDPQpre;
454  case AArch64::LDPWi:
455  return AArch64::LDPWpre;
456  case AArch64::LDPXi:
457  return AArch64::LDPXpre;
458  case AArch64::STPSi:
459  return AArch64::STPSpre;
460  case AArch64::STPDi:
461  return AArch64::STPDpre;
462  case AArch64::STPQi:
463  return AArch64::STPQpre;
464  case AArch64::STPWi:
465  return AArch64::STPWpre;
466  case AArch64::STPXi:
467  return AArch64::STPXpre;
468  case AArch64::STGOffset:
469  return AArch64::STGPreIndex;
470  case AArch64::STZGOffset:
471  return AArch64::STZGPreIndex;
472  case AArch64::ST2GOffset:
473  return AArch64::ST2GPreIndex;
474  case AArch64::STZ2GOffset:
475  return AArch64::STZ2GPreIndex;
476  case AArch64::STGPi:
477  return AArch64::STGPpre;
478  }
479 }
480 
481 static unsigned getPostIndexedOpcode(unsigned Opc) {
482  switch (Opc) {
483  default:
484  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
485  case AArch64::STRSui:
486  case AArch64::STURSi:
487  return AArch64::STRSpost;
488  case AArch64::STRDui:
489  case AArch64::STURDi:
490  return AArch64::STRDpost;
491  case AArch64::STRQui:
492  case AArch64::STURQi:
493  return AArch64::STRQpost;
494  case AArch64::STRBBui:
495  return AArch64::STRBBpost;
496  case AArch64::STRHHui:
497  return AArch64::STRHHpost;
498  case AArch64::STRWui:
499  case AArch64::STURWi:
500  return AArch64::STRWpost;
501  case AArch64::STRXui:
502  case AArch64::STURXi:
503  return AArch64::STRXpost;
504  case AArch64::LDRSui:
505  case AArch64::LDURSi:
506  return AArch64::LDRSpost;
507  case AArch64::LDRDui:
508  case AArch64::LDURDi:
509  return AArch64::LDRDpost;
510  case AArch64::LDRQui:
511  case AArch64::LDURQi:
512  return AArch64::LDRQpost;
513  case AArch64::LDRBBui:
514  return AArch64::LDRBBpost;
515  case AArch64::LDRHHui:
516  return AArch64::LDRHHpost;
517  case AArch64::LDRWui:
518  case AArch64::LDURWi:
519  return AArch64::LDRWpost;
520  case AArch64::LDRXui:
521  case AArch64::LDURXi:
522  return AArch64::LDRXpost;
523  case AArch64::LDRSWui:
524  return AArch64::LDRSWpost;
525  case AArch64::LDPSi:
526  return AArch64::LDPSpost;
527  case AArch64::LDPSWi:
528  return AArch64::LDPSWpost;
529  case AArch64::LDPDi:
530  return AArch64::LDPDpost;
531  case AArch64::LDPQi:
532  return AArch64::LDPQpost;
533  case AArch64::LDPWi:
534  return AArch64::LDPWpost;
535  case AArch64::LDPXi:
536  return AArch64::LDPXpost;
537  case AArch64::STPSi:
538  return AArch64::STPSpost;
539  case AArch64::STPDi:
540  return AArch64::STPDpost;
541  case AArch64::STPQi:
542  return AArch64::STPQpost;
543  case AArch64::STPWi:
544  return AArch64::STPWpost;
545  case AArch64::STPXi:
546  return AArch64::STPXpost;
547  case AArch64::STGOffset:
548  return AArch64::STGPostIndex;
549  case AArch64::STZGOffset:
550  return AArch64::STZGPostIndex;
551  case AArch64::ST2GOffset:
552  return AArch64::ST2GPostIndex;
553  case AArch64::STZ2GOffset:
554  return AArch64::STZ2GPostIndex;
555  case AArch64::STGPi:
556  return AArch64::STGPpost;
557  }
558 }
559 
561 
562  unsigned OpcA = FirstMI.getOpcode();
563  unsigned OpcB = MI.getOpcode();
564 
565  switch (OpcA) {
566  default:
567  return false;
568  case AArch64::STRSpre:
569  return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
570  case AArch64::STRDpre:
571  return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
572  case AArch64::STRQpre:
573  return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
574  case AArch64::STRWpre:
575  return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
576  case AArch64::STRXpre:
577  return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
578  case AArch64::LDRSpre:
579  return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
580  case AArch64::LDRDpre:
581  return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
582  case AArch64::LDRQpre:
583  return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
584  case AArch64::LDRWpre:
585  return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
586  case AArch64::LDRXpre:
587  return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
588  }
589 }
590 
591 // Returns the scale and offset range of pre/post indexed variants of MI.
592 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
593  int &MinOffset, int &MaxOffset) {
594  bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
595  bool IsTagStore = isTagStore(MI);
596  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
597  // as in the "unsigned offset" variant.
598  // All other pre/post indexed ldst instructions are unscaled.
599  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
600 
601  if (IsPaired) {
602  MinOffset = -64;
603  MaxOffset = 63;
604  } else {
605  MinOffset = -256;
606  MaxOffset = 255;
607  }
608 }
609 
611  unsigned PairedRegOp = 0) {
612  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
613  bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
614  if (IsPreLdSt)
615  PairedRegOp += 1;
616  unsigned Idx =
617  AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
618  return MI.getOperand(Idx);
619 }
620 
623  const AArch64InstrInfo *TII) {
624  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
625  int LoadSize = TII->getMemScale(LoadInst);
626  int StoreSize = TII->getMemScale(StoreInst);
627  int UnscaledStOffset =
628  TII->hasUnscaledLdStOffset(StoreInst)
631  int UnscaledLdOffset =
632  TII->hasUnscaledLdStOffset(LoadInst)
635  return (UnscaledStOffset <= UnscaledLdOffset) &&
636  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
637 }
638 
640  unsigned Opc = MI.getOpcode();
641  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
642  isNarrowStore(Opc)) &&
643  getLdStRegOp(MI).getReg() == AArch64::WZR;
644 }
645 
647  switch (MI.getOpcode()) {
648  default:
649  return false;
650  // Scaled instructions.
651  case AArch64::LDRBBui:
652  case AArch64::LDRHHui:
653  case AArch64::LDRWui:
654  case AArch64::LDRXui:
655  // Unscaled instructions.
656  case AArch64::LDURBBi:
657  case AArch64::LDURHHi:
658  case AArch64::LDURWi:
659  case AArch64::LDURXi:
660  return true;
661  }
662 }
663 
665  unsigned Opc = MI.getOpcode();
666  switch (Opc) {
667  default:
668  return false;
669  // Scaled instructions.
670  case AArch64::STRSui:
671  case AArch64::STRDui:
672  case AArch64::STRQui:
673  case AArch64::STRXui:
674  case AArch64::STRWui:
675  case AArch64::STRHHui:
676  case AArch64::STRBBui:
677  case AArch64::LDRSui:
678  case AArch64::LDRDui:
679  case AArch64::LDRQui:
680  case AArch64::LDRXui:
681  case AArch64::LDRWui:
682  case AArch64::LDRHHui:
683  case AArch64::LDRBBui:
684  case AArch64::STGOffset:
685  case AArch64::STZGOffset:
686  case AArch64::ST2GOffset:
687  case AArch64::STZ2GOffset:
688  case AArch64::STGPi:
689  // Unscaled instructions.
690  case AArch64::STURSi:
691  case AArch64::STURDi:
692  case AArch64::STURQi:
693  case AArch64::STURWi:
694  case AArch64::STURXi:
695  case AArch64::LDURSi:
696  case AArch64::LDURDi:
697  case AArch64::LDURQi:
698  case AArch64::LDURWi:
699  case AArch64::LDURXi:
700  // Paired instructions.
701  case AArch64::LDPSi:
702  case AArch64::LDPSWi:
703  case AArch64::LDPDi:
704  case AArch64::LDPQi:
705  case AArch64::LDPWi:
706  case AArch64::LDPXi:
707  case AArch64::STPSi:
708  case AArch64::STPDi:
709  case AArch64::STPQi:
710  case AArch64::STPWi:
711  case AArch64::STPXi:
712  // Make sure this is a reg+imm (as opposed to an address reloc).
714  return false;
715 
716  return true;
717  }
718 }
719 
721 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
723  const LdStPairFlags &Flags) {
725  "Expected promotable zero stores.");
726 
727  MachineBasicBlock::iterator E = I->getParent()->end();
729  // If NextI is the second of the two instructions to be merged, we need
730  // to skip one further. Either way we merge will invalidate the iterator,
731  // and we don't need to scan the new instruction, as it's a pairwise
732  // instruction, which we're not considering for further action anyway.
733  if (NextI == MergeMI)
734  NextI = next_nodbg(NextI, E);
735 
736  unsigned Opc = I->getOpcode();
737  bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
738  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
739 
740  bool MergeForward = Flags.getMergeForward();
741  // Insert our new paired instruction after whichever of the paired
742  // instructions MergeForward indicates.
743  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
744  // Also based on MergeForward is from where we copy the base register operand
745  // so we get the flags compatible with the input code.
746  const MachineOperand &BaseRegOp =
747  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
748  : AArch64InstrInfo::getLdStBaseOp(*I);
749 
750  // Which register is Rt and which is Rt2 depends on the offset order.
751  MachineInstr *RtMI;
753  AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
754  RtMI = &*MergeMI;
755  else
756  RtMI = &*I;
757 
758  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
759  // Change the scaled offset from small to large type.
760  if (IsScaled) {
761  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
762  OffsetImm /= 2;
763  }
764 
765  // Construct the new instruction.
766  DebugLoc DL = I->getDebugLoc();
767  MachineBasicBlock *MBB = I->getParent();
769  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
770  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
771  .add(BaseRegOp)
772  .addImm(OffsetImm)
773  .cloneMergedMemRefs({&*I, &*MergeMI})
774  .setMIFlags(I->mergeFlagsWith(*MergeMI));
775  (void)MIB;
776 
777  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
778  LLVM_DEBUG(I->print(dbgs()));
779  LLVM_DEBUG(dbgs() << " ");
780  LLVM_DEBUG(MergeMI->print(dbgs()));
781  LLVM_DEBUG(dbgs() << " with instruction:\n ");
782  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
783  LLVM_DEBUG(dbgs() << "\n");
784 
785  // Erase the old instructions.
786  I->eraseFromParent();
787  MergeMI->eraseFromParent();
788  return NextI;
789 }
790 
791 // Apply Fn to all instructions between MI and the beginning of the block, until
792 // a def for DefReg is reached. Returns true, iff Fn returns true for all
793 // visited instructions. Stop after visiting Limit iterations.
795  const TargetRegisterInfo *TRI, unsigned Limit,
796  std::function<bool(MachineInstr &, bool)> &Fn) {
797  auto MBB = MI.getParent();
798  for (MachineInstr &I :
799  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
800  if (!Limit)
801  return false;
802  --Limit;
803 
804  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
805  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
806  TRI->regsOverlap(MOP.getReg(), DefReg);
807  });
808  if (!Fn(I, isDef))
809  return false;
810  if (isDef)
811  break;
812  }
813  return true;
814 }
815 
817  const TargetRegisterInfo *TRI) {
818 
819  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
820  if (MOP.isReg() && MOP.isKill())
821  Units.removeReg(MOP.getReg());
822 
823  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
824  if (MOP.isReg() && !MOP.isKill())
825  Units.addReg(MOP.getReg());
826 }
827 
829 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
831  const LdStPairFlags &Flags) {
832  MachineBasicBlock::iterator E = I->getParent()->end();
834  // If NextI is the second of the two instructions to be merged, we need
835  // to skip one further. Either way we merge will invalidate the iterator,
836  // and we don't need to scan the new instruction, as it's a pairwise
837  // instruction, which we're not considering for further action anyway.
838  if (NextI == Paired)
839  NextI = next_nodbg(NextI, E);
840 
841  int SExtIdx = Flags.getSExtIdx();
842  unsigned Opc =
843  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
844  bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
845  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
846 
847  bool MergeForward = Flags.getMergeForward();
848 
849  Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
850  if (MergeForward && RenameReg) {
851  MCRegister RegToRename = getLdStRegOp(*I).getReg();
852  DefinedInBB.addReg(*RenameReg);
853 
854  // Return the sub/super register for RenameReg, matching the size of
855  // OriginalReg.
856  auto GetMatchingSubReg = [this,
857  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
858  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
859  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
860  TRI->getMinimalPhysRegClass(SubOrSuper))
861  return SubOrSuper;
862  llvm_unreachable("Should have found matching sub or super register!");
863  };
864 
865  std::function<bool(MachineInstr &, bool)> UpdateMIs =
866  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
867  if (IsDef) {
868  bool SeenDef = false;
869  for (auto &MOP : MI.operands()) {
870  // Rename the first explicit definition and all implicit
871  // definitions matching RegToRename.
872  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
873  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
874  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
875  assert((MOP.isImplicit() ||
876  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
877  "Need renamable operands");
878  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
879  SeenDef = true;
880  }
881  }
882  } else {
883  for (auto &MOP : MI.operands()) {
884  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
885  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
886  assert((MOP.isImplicit() ||
887  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
888  "Need renamable operands");
889  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
890  }
891  }
892  }
893  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
894  return true;
895  };
896  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
897 
898 #if !defined(NDEBUG)
899  // Make sure the register used for renaming is not used between the paired
900  // instructions. That would trash the content before the new paired
901  // instruction.
902  for (auto &MI :
904  std::next(I), std::next(Paired)))
905  assert(all_of(MI.operands(),
906  [this, &RenameReg](const MachineOperand &MOP) {
907  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
908  MOP.isUndef() ||
909  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
910  }) &&
911  "Rename register used between paired instruction, trashing the "
912  "content");
913 #endif
914  }
915 
916  // Insert our new paired instruction after whichever of the paired
917  // instructions MergeForward indicates.
918  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
919  // Also based on MergeForward is from where we copy the base register operand
920  // so we get the flags compatible with the input code.
921  const MachineOperand &BaseRegOp =
922  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
923  : AArch64InstrInfo::getLdStBaseOp(*I);
924 
926  int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
927  bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
928  if (IsUnscaled != PairedIsUnscaled) {
929  // We're trying to pair instructions that differ in how they are scaled. If
930  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
931  // the opposite (i.e., make Paired's offset unscaled).
932  int MemSize = TII->getMemScale(*Paired);
933  if (PairedIsUnscaled) {
934  // If the unscaled offset isn't a multiple of the MemSize, we can't
935  // pair the operations together.
936  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
937  "Offset should be a multiple of the stride!");
938  PairedOffset /= MemSize;
939  } else {
940  PairedOffset *= MemSize;
941  }
942  }
943 
944  // Which register is Rt and which is Rt2 depends on the offset order.
945  // However, for pre load/stores the Rt should be the one of the pre
946  // load/store.
947  MachineInstr *RtMI, *Rt2MI;
948  if (Offset == PairedOffset + OffsetStride &&
950  RtMI = &*Paired;
951  Rt2MI = &*I;
952  // Here we swapped the assumption made for SExtIdx.
953  // I.e., we turn ldp I, Paired into ldp Paired, I.
954  // Update the index accordingly.
955  if (SExtIdx != -1)
956  SExtIdx = (SExtIdx + 1) % 2;
957  } else {
958  RtMI = &*I;
959  Rt2MI = &*Paired;
960  }
961  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
962  // Scale the immediate offset, if necessary.
963  if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
964  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
965  "Unscaled offset cannot be scaled.");
966  OffsetImm /= TII->getMemScale(*RtMI);
967  }
968 
969  // Construct the new instruction.
971  DebugLoc DL = I->getDebugLoc();
972  MachineBasicBlock *MBB = I->getParent();
973  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
974  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
975  // Kill flags may become invalid when moving stores for pairing.
976  if (RegOp0.isUse()) {
977  if (!MergeForward) {
978  // Clear kill flags on store if moving upwards. Example:
979  // STRWui %w0, ...
980  // USE %w1
981  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
982  RegOp0.setIsKill(false);
983  RegOp1.setIsKill(false);
984  } else {
985  // Clear kill flags of the first stores register. Example:
986  // STRWui %w1, ...
987  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
988  // STRW %w0
990  for (MachineInstr &MI : make_range(std::next(I), Paired))
991  MI.clearRegisterKills(Reg, TRI);
992  }
993  }
994 
995  unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
996  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
997 
998  // Adds the pre-index operand for pre-indexed ld/st pairs.
999  if (AArch64InstrInfo::isPreLdSt(*RtMI))
1000  MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1001 
1002  MIB.add(RegOp0)
1003  .add(RegOp1)
1004  .add(BaseRegOp)
1005  .addImm(OffsetImm)
1006  .cloneMergedMemRefs({&*I, &*Paired})
1007  .setMIFlags(I->mergeFlagsWith(*Paired));
1008 
1009  (void)MIB;
1010 
1011  LLVM_DEBUG(
1012  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1013  LLVM_DEBUG(I->print(dbgs()));
1014  LLVM_DEBUG(dbgs() << " ");
1015  LLVM_DEBUG(Paired->print(dbgs()));
1016  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1017  if (SExtIdx != -1) {
1018  // Generate the sign extension for the proper result of the ldp.
1019  // I.e., with X1, that would be:
1020  // %w1 = KILL %w1, implicit-def %x1
1021  // %x1 = SBFMXri killed %x1, 0, 31
1022  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1023  // Right now, DstMO has the extended register, since it comes from an
1024  // extended opcode.
1025  Register DstRegX = DstMO.getReg();
1026  // Get the W variant of that register.
1027  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1028  // Update the result of LDP to use the W instead of the X variant.
1029  DstMO.setReg(DstRegW);
1030  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1031  LLVM_DEBUG(dbgs() << "\n");
1032  // Make the machine verifier happy by providing a definition for
1033  // the X register.
1034  // Insert this definition right after the generated LDP, i.e., before
1035  // InsertionPoint.
1036  MachineInstrBuilder MIBKill =
1037  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1038  .addReg(DstRegW)
1039  .addReg(DstRegX, RegState::Define);
1040  MIBKill->getOperand(2).setImplicit();
1041  // Create the sign extension.
1042  MachineInstrBuilder MIBSXTW =
1043  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1044  .addReg(DstRegX)
1045  .addImm(0)
1046  .addImm(31);
1047  (void)MIBSXTW;
1048  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1049  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1050  } else {
1051  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1052  }
1053  LLVM_DEBUG(dbgs() << "\n");
1054 
1055  if (MergeForward)
1056  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1057  if (MOP.isReg() && MOP.isKill())
1058  DefinedInBB.addReg(MOP.getReg());
1059 
1060  // Erase the old instructions.
1061  I->eraseFromParent();
1062  Paired->eraseFromParent();
1063 
1064  return NextI;
1065 }
1066 
1068 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1069  MachineBasicBlock::iterator StoreI) {
1071  next_nodbg(LoadI, LoadI->getParent()->end());
1072 
1073  int LoadSize = TII->getMemScale(*LoadI);
1074  int StoreSize = TII->getMemScale(*StoreI);
1075  Register LdRt = getLdStRegOp(*LoadI).getReg();
1076  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1077  Register StRt = getLdStRegOp(*StoreI).getReg();
1078  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1079 
1080  assert((IsStoreXReg ||
1081  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1082  "Unexpected RegClass");
1083 
1084  MachineInstr *BitExtMI;
1085  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1086  // Remove the load, if the destination register of the loads is the same
1087  // register for stored value.
1088  if (StRt == LdRt && LoadSize == 8) {
1089  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1090  LoadI->getIterator())) {
1091  if (MI.killsRegister(StRt, TRI)) {
1092  MI.clearRegisterKills(StRt, TRI);
1093  break;
1094  }
1095  }
1096  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1097  LLVM_DEBUG(LoadI->print(dbgs()));
1098  LLVM_DEBUG(dbgs() << "\n");
1099  LoadI->eraseFromParent();
1100  return NextI;
1101  }
1102  // Replace the load with a mov if the load and store are in the same size.
1103  BitExtMI =
1104  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1105  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1106  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1107  .add(StMO)
1109  .setMIFlags(LoadI->getFlags());
1110  } else {
1111  // FIXME: Currently we disable this transformation in big-endian targets as
1112  // performance and correctness are verified only in little-endian.
1113  if (!Subtarget->isLittleEndian())
1114  return NextI;
1115  bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1116  assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1117  "Unsupported ld/st match");
1118  assert(LoadSize <= StoreSize && "Invalid load size");
1119  int UnscaledLdOffset =
1120  IsUnscaled
1122  : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1123  int UnscaledStOffset =
1124  IsUnscaled
1126  : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1127  int Width = LoadSize * 8;
1128  Register DestReg =
1129  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1130  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1131  : LdRt;
1132 
1133  assert((UnscaledLdOffset >= UnscaledStOffset &&
1134  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1135  "Invalid offset");
1136 
1137  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1138  int Imms = Immr + Width - 1;
1139  if (UnscaledLdOffset == UnscaledStOffset) {
1140  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1141  | ((Immr) << 6) // immr
1142  | ((Imms) << 0) // imms
1143  ;
1144 
1145  BitExtMI =
1146  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1147  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1148  DestReg)
1149  .add(StMO)
1150  .addImm(AndMaskEncoded)
1151  .setMIFlags(LoadI->getFlags());
1152  } else {
1153  BitExtMI =
1154  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1155  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1156  DestReg)
1157  .add(StMO)
1158  .addImm(Immr)
1159  .addImm(Imms)
1160  .setMIFlags(LoadI->getFlags());
1161  }
1162  }
1163 
1164  // Clear kill flags between store and load.
1165  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1166  BitExtMI->getIterator()))
1167  if (MI.killsRegister(StRt, TRI)) {
1168  MI.clearRegisterKills(StRt, TRI);
1169  break;
1170  }
1171 
1172  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1173  LLVM_DEBUG(StoreI->print(dbgs()));
1174  LLVM_DEBUG(dbgs() << " ");
1175  LLVM_DEBUG(LoadI->print(dbgs()));
1176  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1177  LLVM_DEBUG(StoreI->print(dbgs()));
1178  LLVM_DEBUG(dbgs() << " ");
1179  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1180  LLVM_DEBUG(dbgs() << "\n");
1181 
1182  // Erase the old instructions.
1183  LoadI->eraseFromParent();
1184  return NextI;
1185 }
1186 
1187 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1188  // Convert the byte-offset used by unscaled into an "element" offset used
1189  // by the scaled pair load/store instructions.
1190  if (IsUnscaled) {
1191  // If the byte-offset isn't a multiple of the stride, there's no point
1192  // trying to match it.
1193  if (Offset % OffsetStride)
1194  return false;
1195  Offset /= OffsetStride;
1196  }
1197  return Offset <= 63 && Offset >= -64;
1198 }
1199 
1200 // Do alignment, specialized to power of 2 and for signed ints,
1201 // avoiding having to do a C-style cast from uint_64t to int when
1202 // using alignTo from include/llvm/Support/MathExtras.h.
1203 // FIXME: Move this function to include/MathExtras.h?
1204 static int alignTo(int Num, int PowOf2) {
1205  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1206 }
1207 
1208 static bool mayAlias(MachineInstr &MIa,
1210  AliasAnalysis *AA) {
1211  for (MachineInstr *MIb : MemInsns)
1212  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1213  return true;
1214 
1215  return false;
1216 }
1217 
1218 bool AArch64LoadStoreOpt::findMatchingStore(
1219  MachineBasicBlock::iterator I, unsigned Limit,
1220  MachineBasicBlock::iterator &StoreI) {
1221  MachineBasicBlock::iterator B = I->getParent()->begin();
1223  MachineInstr &LoadMI = *I;
1224  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
1225 
1226  // If the load is the first instruction in the block, there's obviously
1227  // not any matching store.
1228  if (MBBI == B)
1229  return false;
1230 
1231  // Track which register units have been modified and used between the first
1232  // insn and the second insn.
1233  ModifiedRegUnits.clear();
1234  UsedRegUnits.clear();
1235 
1236  unsigned Count = 0;
1237  do {
1238  MBBI = prev_nodbg(MBBI, B);
1239  MachineInstr &MI = *MBBI;
1240 
1241  // Don't count transient instructions towards the search limit since there
1242  // may be different numbers of them if e.g. debug information is present.
1243  if (!MI.isTransient())
1244  ++Count;
1245 
1246  // If the load instruction reads directly from the address to which the
1247  // store instruction writes and the stored value is not modified, we can
1248  // promote the load. Since we do not handle stores with pre-/post-index,
1249  // it's unnecessary to check if BaseReg is modified by the store itself.
1250  // Also we can't handle stores without an immediate offset operand,
1251  // while the operand might be the address for a global variable.
1252  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1253  BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
1255  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1256  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1257  StoreI = MBBI;
1258  return true;
1259  }
1260 
1261  if (MI.isCall())
1262  return false;
1263 
1264  // Update modified / uses register units.
1265  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1266 
1267  // Otherwise, if the base register is modified, we have no match, so
1268  // return early.
1269  if (!ModifiedRegUnits.available(BaseReg))
1270  return false;
1271 
1272  // If we encounter a store aliased with the load, return early.
1273  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1274  return false;
1275  } while (MBBI != B && Count < Limit);
1276  return false;
1277 }
1278 
1279 static bool needsWinCFI(const MachineFunction *MF) {
1280  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1282 }
1283 
1284 // Returns true if FirstMI and MI are candidates for merging or pairing.
1285 // Otherwise, returns false.
1287  LdStPairFlags &Flags,
1288  const AArch64InstrInfo *TII) {
1289  // If this is volatile or if pairing is suppressed, not a candidate.
1290  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1291  return false;
1292 
1293  // We should have already checked FirstMI for pair suppression and volatility.
1294  assert(!FirstMI.hasOrderedMemoryRef() &&
1295  !TII->isLdStPairSuppressed(FirstMI) &&
1296  "FirstMI shouldn't get here if either of these checks are true.");
1297 
1298  if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1299  MI.getFlag(MachineInstr::FrameDestroy)))
1300  return false;
1301 
1302  unsigned OpcA = FirstMI.getOpcode();
1303  unsigned OpcB = MI.getOpcode();
1304 
1305  // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1306  if (OpcA == OpcB)
1307  return !AArch64InstrInfo::isPreLdSt(FirstMI);
1308 
1309  // Try to match a sign-extended load/store with a zero-extended load/store.
1310  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1311  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1312  assert(IsValidLdStrOpc &&
1313  "Given Opc should be a Load or Store with an immediate");
1314  // OpcA will be the first instruction in the pair.
1315  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1316  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1317  return true;
1318  }
1319 
1320  // If the second instruction isn't even a mergable/pairable load/store, bail
1321  // out.
1322  if (!PairIsValidLdStrOpc)
1323  return false;
1324 
1325  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1326  // offsets.
1327  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1328  return false;
1329 
1330  // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1331  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1332  // are candidate pairs that can be merged.
1333  if (isPreLdStPairCandidate(FirstMI, MI))
1334  return true;
1335 
1336  // Try to match an unscaled load/store with a scaled load/store.
1337  return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1339 
1340  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1341 }
1342 
1343 static bool
1344 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1346  const TargetRegisterInfo *TRI) {
1347  if (!FirstMI.mayStore())
1348  return false;
1349 
1350  // Check if we can find an unused register which we can use to rename
1351  // the register used by the first load/store.
1352  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1353  MachineFunction &MF = *FirstMI.getParent()->getParent();
1354  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1355  return false;
1356 
1357  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1358  // For now, we only rename if the store operand gets killed at the store.
1359  if (!getLdStRegOp(FirstMI).isKill() &&
1360  !any_of(FirstMI.operands(),
1361  [TRI, RegToRename](const MachineOperand &MOP) {
1362  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1363  MOP.isImplicit() && MOP.isKill() &&
1364  TRI->regsOverlap(RegToRename, MOP.getReg());
1365  })) {
1366  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1367  return false;
1368  }
1369  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1370  if (MOP.isReg()) {
1371  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1372  // Renaming registers with multiple disjunct sub-registers (e.g. the
1373  // result of a LD3) means that all sub-registers are renamed, potentially
1374  // impacting other instructions we did not check. Bail out.
1375  // Note that this relies on the structure of the AArch64 register file. In
1376  // particular, a subregister cannot be written without overwriting the
1377  // whole register.
1378  if (RegClass->HasDisjunctSubRegs) {
1379  LLVM_DEBUG(
1380  dbgs()
1381  << " Cannot rename operands with multiple disjunct subregisters ("
1382  << MOP << ")\n");
1383  return false;
1384  }
1385  }
1386  return MOP.isImplicit() ||
1387  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1388  };
1389 
1390  bool FoundDef = false;
1391 
1392  // For each instruction between FirstMI and the previous def for RegToRename,
1393  // we
1394  // * check if we can rename RegToRename in this instruction
1395  // * collect the registers used and required register classes for RegToRename.
1396  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1397  bool IsDef) {
1398  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1399  // Currently we do not try to rename across frame-setup instructions.
1400  if (MI.getFlag(MachineInstr::FrameSetup)) {
1401  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1402  << MI << ")\n");
1403  return false;
1404  }
1405 
1406  UsedInBetween.accumulate(MI);
1407 
1408  // For a definition, check that we can rename the definition and exit the
1409  // loop.
1410  FoundDef = IsDef;
1411 
1412  // For defs, check if we can rename the first def of RegToRename.
1413  if (FoundDef) {
1414  // For some pseudo instructions, we might not generate code in the end
1415  // (e.g. KILL) and we would end up without a correct def for the rename
1416  // register.
1417  // TODO: This might be overly conservative and we could handle those cases
1418  // in multiple ways:
1419  // 1. Insert an extra copy, to materialize the def.
1420  // 2. Skip pseudo-defs until we find an non-pseudo def.
1421  if (MI.isPseudo()) {
1422  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1423  << "\n");
1424  return false;
1425  }
1426 
1427  for (auto &MOP : MI.operands()) {
1428  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1429  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1430  continue;
1431  if (!canRenameMOP(MOP)) {
1432  LLVM_DEBUG(dbgs()
1433  << " Cannot rename " << MOP << " in " << MI << "\n");
1434  return false;
1435  }
1436  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1437  }
1438  return true;
1439  } else {
1440  for (auto &MOP : MI.operands()) {
1441  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1442  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1443  continue;
1444 
1445  if (!canRenameMOP(MOP)) {
1446  LLVM_DEBUG(dbgs()
1447  << " Cannot rename " << MOP << " in " << MI << "\n");
1448  return false;
1449  }
1450  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1451  }
1452  }
1453  return true;
1454  };
1455 
1456  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1457  return false;
1458 
1459  if (!FoundDef) {
1460  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1461  return false;
1462  }
1463  return true;
1464 }
1465 
1466 // Check if we can find a physical register for renaming \p Reg. This register
1467 // must:
1468 // * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1469 // defined registers up to the point where the renamed register will be used,
1470 // * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1471 // registers in the range the rename register will be used,
1472 // * is available in all used register classes (checked using RequiredClasses).
1474  const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1475  LiveRegUnits &UsedInBetween,
1477  const TargetRegisterInfo *TRI) {
1478  const MachineRegisterInfo &RegInfo = MF.getRegInfo();
1479 
1480  // Checks if any sub- or super-register of PR is callee saved.
1481  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1483  [&MF, TRI](MCPhysReg SubOrSuper) {
1484  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1485  });
1486  };
1487 
1488  // Check if PR or one of its sub- or super-registers can be used for all
1489  // required register classes.
1490  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1491  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1493  [C, TRI](MCPhysReg SubOrSuper) {
1494  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1495  });
1496  });
1497  };
1498 
1499  auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1500  for (const MCPhysReg &PR : *RegClass) {
1501  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1502  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1503  CanBeUsedForAllClasses(PR)) {
1504  DefinedInBB.addReg(PR);
1505  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1506  << "\n");
1507  return {PR};
1508  }
1509  }
1510  LLVM_DEBUG(dbgs() << "No rename register found from "
1511  << TRI->getRegClassName(RegClass) << "\n");
1512  return None;
1513 }
1514 
1515 /// Scan the instructions looking for a load/store that can be combined with the
1516 /// current instruction into a wider equivalent or a load/store pair.
1518 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1519  LdStPairFlags &Flags, unsigned Limit,
1520  bool FindNarrowMerge) {
1521  MachineBasicBlock::iterator E = I->getParent()->end();
1523  MachineBasicBlock::iterator MBBIWithRenameReg;
1524  MachineInstr &FirstMI = *I;
1525  MBBI = next_nodbg(MBBI, E);
1526 
1527  bool MayLoad = FirstMI.mayLoad();
1528  bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1529  Register Reg = getLdStRegOp(FirstMI).getReg();
1530  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
1532  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1533  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1534 
1535  std::optional<bool> MaybeCanRename;
1536  if (!EnableRenaming)
1537  MaybeCanRename = {false};
1538 
1540  LiveRegUnits UsedInBetween;
1541  UsedInBetween.init(*TRI);
1542 
1543  Flags.clearRenameReg();
1544 
1545  // Track which register units have been modified and used between the first
1546  // insn (inclusive) and the second insn.
1547  ModifiedRegUnits.clear();
1548  UsedRegUnits.clear();
1549 
1550  // Remember any instructions that read/write memory between FirstMI and MI.
1552 
1553  for (unsigned Count = 0; MBBI != E && Count < Limit;
1554  MBBI = next_nodbg(MBBI, E)) {
1555  MachineInstr &MI = *MBBI;
1556 
1557  UsedInBetween.accumulate(MI);
1558 
1559  // Don't count transient instructions towards the search limit since there
1560  // may be different numbers of them if e.g. debug information is present.
1561  if (!MI.isTransient())
1562  ++Count;
1563 
1564  Flags.setSExtIdx(-1);
1565  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1567  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1568  // If we've found another instruction with the same opcode, check to see
1569  // if the base and offset are compatible with our starting instruction.
1570  // These instructions all have scaled immediate operands, so we just
1571  // check for +1/-1. Make sure to check the new instruction offset is
1572  // actually an immediate and not a symbolic reference destined for
1573  // a relocation.
1575  int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
1576  bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1577  if (IsUnscaled != MIIsUnscaled) {
1578  // We're trying to pair instructions that differ in how they are scaled.
1579  // If FirstMI is scaled then scale the offset of MI accordingly.
1580  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1581  int MemSize = TII->getMemScale(MI);
1582  if (MIIsUnscaled) {
1583  // If the unscaled offset isn't a multiple of the MemSize, we can't
1584  // pair the operations together: bail and keep looking.
1585  if (MIOffset % MemSize) {
1586  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1587  UsedRegUnits, TRI);
1588  MemInsns.push_back(&MI);
1589  continue;
1590  }
1591  MIOffset /= MemSize;
1592  } else {
1593  MIOffset *= MemSize;
1594  }
1595  }
1596 
1597  bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1598 
1599  if (BaseReg == MIBaseReg) {
1600  // If the offset of the second ld/st is not equal to the size of the
1601  // destination register it can’t be paired with a pre-index ld/st
1602  // pair. Additionally if the base reg is used or modified the operations
1603  // can't be paired: bail and keep looking.
1604  if (IsPreLdSt) {
1605  bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1606  bool IsBaseRegUsed = !UsedRegUnits.available(
1608  bool IsBaseRegModified = !ModifiedRegUnits.available(
1610  // If the stored value and the address of the second instruction is
1611  // the same, it needs to be using the updated register and therefore
1612  // it must not be folded.
1613  bool IsMIRegTheSame =
1616  if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1617  IsMIRegTheSame) {
1618  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1619  UsedRegUnits, TRI);
1620  MemInsns.push_back(&MI);
1621  continue;
1622  }
1623  } else {
1624  if ((Offset != MIOffset + OffsetStride) &&
1625  (Offset + OffsetStride != MIOffset)) {
1626  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1627  UsedRegUnits, TRI);
1628  MemInsns.push_back(&MI);
1629  continue;
1630  }
1631  }
1632 
1633  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1634  if (FindNarrowMerge) {
1635  // If the alignment requirements of the scaled wide load/store
1636  // instruction can't express the offset of the scaled narrow input,
1637  // bail and keep looking. For promotable zero stores, allow only when
1638  // the stored value is the same (i.e., WZR).
1639  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1640  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1641  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1642  UsedRegUnits, TRI);
1643  MemInsns.push_back(&MI);
1644  continue;
1645  }
1646  } else {
1647  // Pairwise instructions have a 7-bit signed offset field. Single
1648  // insns have a 12-bit unsigned offset field. If the resultant
1649  // immediate offset of merging these instructions is out of range for
1650  // a pairwise instruction, bail and keep looking.
1651  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1652  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1653  UsedRegUnits, TRI);
1654  MemInsns.push_back(&MI);
1655  continue;
1656  }
1657  // If the alignment requirements of the paired (scaled) instruction
1658  // can't express the offset of the unscaled input, bail and keep
1659  // looking.
1660  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1661  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1662  UsedRegUnits, TRI);
1663  MemInsns.push_back(&MI);
1664  continue;
1665  }
1666  }
1667  // If the destination register of one load is the same register or a
1668  // sub/super register of the other load, bail and keep looking. A
1669  // load-pair instruction with both destination registers the same is
1670  // UNPREDICTABLE and will result in an exception.
1671  if (MayLoad &&
1673  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1674  TRI);
1675  MemInsns.push_back(&MI);
1676  continue;
1677  }
1678 
1679  // If the BaseReg has been modified, then we cannot do the optimization.
1680  // For example, in the following pattern
1681  // ldr x1 [x2]
1682  // ldr x2 [x3]
1683  // ldr x4 [x2, #8],
1684  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1685  if (!ModifiedRegUnits.available(BaseReg))
1686  return E;
1687 
1688  // If the Rt of the second instruction was not modified or used between
1689  // the two instructions and none of the instructions between the second
1690  // and first alias with the second, we can combine the second into the
1691  // first.
1692  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1693  !(MI.mayLoad() &&
1694  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1695  !mayAlias(MI, MemInsns, AA)) {
1696 
1697  Flags.setMergeForward(false);
1698  Flags.clearRenameReg();
1699  return MBBI;
1700  }
1701 
1702  // Likewise, if the Rt of the first instruction is not modified or used
1703  // between the two instructions and none of the instructions between the
1704  // first and the second alias with the first, we can combine the first
1705  // into the second.
1706  if (!(MayLoad &&
1707  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1708  !mayAlias(FirstMI, MemInsns, AA)) {
1709 
1710  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1711  Flags.setMergeForward(true);
1712  Flags.clearRenameReg();
1713  return MBBI;
1714  }
1715 
1716  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1717  if (!MaybeCanRename)
1718  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1719  RequiredClasses, TRI)};
1720 
1721  if (*MaybeCanRename) {
1723  *FirstMI.getParent()->getParent(), Reg, DefinedInBB,
1724  UsedInBetween, RequiredClasses, TRI);
1725  if (MaybeRenameReg) {
1726  Flags.setRenameReg(*MaybeRenameReg);
1727  Flags.setMergeForward(true);
1728  MBBIWithRenameReg = MBBI;
1729  }
1730  }
1731  }
1732  }
1733  // Unable to combine these instructions due to interference in between.
1734  // Keep looking.
1735  }
1736  }
1737 
1738  if (Flags.getRenameReg())
1739  return MBBIWithRenameReg;
1740 
1741  // If the instruction wasn't a matching load or store. Stop searching if we
1742  // encounter a call instruction that might modify memory.
1743  if (MI.isCall())
1744  return E;
1745 
1746  // Update modified / uses register units.
1747  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1748 
1749  // Otherwise, if the base register is modified, we have no match, so
1750  // return early.
1751  if (!ModifiedRegUnits.available(BaseReg))
1752  return E;
1753 
1754  // Update list of instructions that read/write memory.
1755  if (MI.mayLoadOrStore())
1756  MemInsns.push_back(&MI);
1757  }
1758  return E;
1759 }
1760 
1763  auto End = MI.getParent()->end();
1764  if (MaybeCFI == End ||
1765  MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1766  !(MI.getFlag(MachineInstr::FrameSetup) ||
1767  MI.getFlag(MachineInstr::FrameDestroy)) ||
1768  AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
1769  return End;
1770 
1771  const MachineFunction &MF = *MI.getParent()->getParent();
1772  unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1773  const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
1774  switch (CFI.getOperation()) {
1777  return MaybeCFI;
1778  default:
1779  return End;
1780  }
1781 }
1782 
1784 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1786  bool IsPreIdx) {
1787  assert((Update->getOpcode() == AArch64::ADDXri ||
1788  Update->getOpcode() == AArch64::SUBXri) &&
1789  "Unexpected base register update instruction to merge!");
1790  MachineBasicBlock::iterator E = I->getParent()->end();
1792 
1793  // If updating the SP and the following instruction is CFA offset related CFI
1794  // instruction move it after the merged instruction.
1796  IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
1797 
1798  // Return the instruction following the merged instruction, which is
1799  // the instruction following our unmerged load. Unless that's the add/sub
1800  // instruction we're merging, in which case it's the one after that.
1801  if (NextI == Update)
1802  NextI = next_nodbg(NextI, E);
1803 
1804  int Value = Update->getOperand(2).getImm();
1805  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1806  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1807  if (Update->getOpcode() == AArch64::SUBXri)
1808  Value = -Value;
1809 
1810  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1812  MachineInstrBuilder MIB;
1813  int Scale, MinOffset, MaxOffset;
1814  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1816  // Non-paired instruction.
1817  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1818  .add(getLdStRegOp(*Update))
1819  .add(getLdStRegOp(*I))
1821  .addImm(Value / Scale)
1822  .setMemRefs(I->memoperands())
1823  .setMIFlags(I->mergeFlagsWith(*Update));
1824  } else {
1825  // Paired instruction.
1826  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1827  .add(getLdStRegOp(*Update))
1828  .add(getLdStRegOp(*I, 0))
1829  .add(getLdStRegOp(*I, 1))
1831  .addImm(Value / Scale)
1832  .setMemRefs(I->memoperands())
1833  .setMIFlags(I->mergeFlagsWith(*Update));
1834  }
1835  if (CFI != E) {
1836  MachineBasicBlock *MBB = I->getParent();
1837  MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
1838  }
1839 
1840  if (IsPreIdx) {
1841  ++NumPreFolded;
1842  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1843  } else {
1844  ++NumPostFolded;
1845  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1846  }
1847  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1848  LLVM_DEBUG(I->print(dbgs()));
1849  LLVM_DEBUG(dbgs() << " ");
1850  LLVM_DEBUG(Update->print(dbgs()));
1851  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1852  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1853  LLVM_DEBUG(dbgs() << "\n");
1854 
1855  // Erase the old instructions for the block.
1856  I->eraseFromParent();
1857  Update->eraseFromParent();
1858 
1859  return NextI;
1860 }
1861 
1862 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1863  MachineInstr &MI,
1864  unsigned BaseReg, int Offset) {
1865  switch (MI.getOpcode()) {
1866  default:
1867  break;
1868  case AArch64::SUBXri:
1869  case AArch64::ADDXri:
1870  // Make sure it's a vanilla immediate operand, not a relocation or
1871  // anything else we can't handle.
1872  if (!MI.getOperand(2).isImm())
1873  break;
1874  // Watch out for 1 << 12 shifted value.
1875  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1876  break;
1877 
1878  // The update instruction source and destination register must be the
1879  // same as the load/store base register.
1880  if (MI.getOperand(0).getReg() != BaseReg ||
1881  MI.getOperand(1).getReg() != BaseReg)
1882  break;
1883 
1884  int UpdateOffset = MI.getOperand(2).getImm();
1885  if (MI.getOpcode() == AArch64::SUBXri)
1886  UpdateOffset = -UpdateOffset;
1887 
1888  // The immediate must be a multiple of the scaling factor of the pre/post
1889  // indexed instruction.
1890  int Scale, MinOffset, MaxOffset;
1891  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1892  if (UpdateOffset % Scale != 0)
1893  break;
1894 
1895  // Scaled offset must fit in the instruction immediate.
1896  int ScaledOffset = UpdateOffset / Scale;
1897  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1898  break;
1899 
1900  // If we have a non-zero Offset, we check that it matches the amount
1901  // we're adding to the register.
1902  if (!Offset || Offset == UpdateOffset)
1903  return true;
1904  break;
1905  }
1906  return false;
1907 }
1908 
1909 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1910  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1911  MachineBasicBlock::iterator E = I->getParent()->end();
1912  MachineInstr &MemMI = *I;
1914 
1915  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1916  int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
1917  TII->getMemScale(MemMI);
1918 
1919  // Scan forward looking for post-index opportunities. Updating instructions
1920  // can't be formed if the memory instruction doesn't have the offset we're
1921  // looking for.
1922  if (MIUnscaledOffset != UnscaledOffset)
1923  return E;
1924 
1925  // If the base register overlaps a source/destination register, we can't
1926  // merge the update. This does not apply to tag store instructions which
1927  // ignore the address part of the source register.
1928  // This does not apply to STGPi as well, which does not have unpredictable
1929  // behavior in this case unlike normal stores, and always performs writeback
1930  // after reading the source register value.
1931  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1932  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
1933  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1934  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1935  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1936  return E;
1937  }
1938  }
1939 
1940  // Track which register units have been modified and used between the first
1941  // insn (inclusive) and the second insn.
1942  ModifiedRegUnits.clear();
1943  UsedRegUnits.clear();
1944  MBBI = next_nodbg(MBBI, E);
1945 
1946  // We can't post-increment the stack pointer if any instruction between
1947  // the memory access (I) and the increment (MBBI) can access the memory
1948  // region defined by [SP, MBBI].
1949  const bool BaseRegSP = BaseReg == AArch64::SP;
1950  if (BaseRegSP && needsWinCFI(I->getMF())) {
1951  // FIXME: For now, we always block the optimization over SP in windows
1952  // targets as it requires to adjust the unwind/debug info, messing up
1953  // the unwind info can actually cause a miscompile.
1954  return E;
1955  }
1956 
1957  for (unsigned Count = 0; MBBI != E && Count < Limit;
1958  MBBI = next_nodbg(MBBI, E)) {
1959  MachineInstr &MI = *MBBI;
1960 
1961  // Don't count transient instructions towards the search limit since there
1962  // may be different numbers of them if e.g. debug information is present.
1963  if (!MI.isTransient())
1964  ++Count;
1965 
1966  // If we found a match, return it.
1967  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1968  return MBBI;
1969 
1970  // Update the status of what the instruction clobbered and used.
1971  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1972 
1973  // Otherwise, if the base register is used or modified, we have no match, so
1974  // return early.
1975  // If we are optimizing SP, do not allow instructions that may load or store
1976  // in between the load and the optimized value update.
1977  if (!ModifiedRegUnits.available(BaseReg) ||
1978  !UsedRegUnits.available(BaseReg) ||
1979  (BaseRegSP && MBBI->mayLoadOrStore()))
1980  return E;
1981  }
1982  return E;
1983 }
1984 
1985 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1986  MachineBasicBlock::iterator I, unsigned Limit) {
1987  MachineBasicBlock::iterator B = I->getParent()->begin();
1988  MachineBasicBlock::iterator E = I->getParent()->end();
1989  MachineInstr &MemMI = *I;
1991  MachineFunction &MF = *MemMI.getMF();
1992 
1993  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1995 
1996  // If the load/store is the first instruction in the block, there's obviously
1997  // not any matching update. Ditto if the memory offset isn't zero.
1998  if (MBBI == B || Offset != 0)
1999  return E;
2000  // If the base register overlaps a destination register, we can't
2001  // merge the update.
2002  if (!isTagStore(MemMI)) {
2003  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2004  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2005  Register DestReg = getLdStRegOp(MemMI, i).getReg();
2006  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2007  return E;
2008  }
2009  }
2010 
2011  const bool BaseRegSP = BaseReg == AArch64::SP;
2012  if (BaseRegSP && needsWinCFI(I->getMF())) {
2013  // FIXME: For now, we always block the optimization over SP in windows
2014  // targets as it requires to adjust the unwind/debug info, messing up
2015  // the unwind info can actually cause a miscompile.
2016  return E;
2017  }
2018 
2019  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2020  unsigned RedZoneSize =
2021  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2022 
2023  // Track which register units have been modified and used between the first
2024  // insn (inclusive) and the second insn.
2025  ModifiedRegUnits.clear();
2026  UsedRegUnits.clear();
2027  unsigned Count = 0;
2028  bool MemAcessBeforeSPPreInc = false;
2029  do {
2030  MBBI = prev_nodbg(MBBI, B);
2031  MachineInstr &MI = *MBBI;
2032 
2033  // Don't count transient instructions towards the search limit since there
2034  // may be different numbers of them if e.g. debug information is present.
2035  if (!MI.isTransient())
2036  ++Count;
2037 
2038  // If we found a match, return it.
2039  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2040  // Check that the update value is within our red zone limit (which may be
2041  // zero).
2042  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2043  return E;
2044  return MBBI;
2045  }
2046 
2047  // Update the status of what the instruction clobbered and used.
2048  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2049 
2050  // Otherwise, if the base register is used or modified, we have no match, so
2051  // return early.
2052  if (!ModifiedRegUnits.available(BaseReg) ||
2053  !UsedRegUnits.available(BaseReg))
2054  return E;
2055  // Keep track if we have a memory access before an SP pre-increment, in this
2056  // case we need to validate later that the update amount respects the red
2057  // zone.
2058  if (BaseRegSP && MBBI->mayLoadOrStore())
2059  MemAcessBeforeSPPreInc = true;
2060  } while (MBBI != B && Count < Limit);
2061  return E;
2062 }
2063 
2064 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2066  MachineInstr &MI = *MBBI;
2067  // If this is a volatile load, don't mess with it.
2068  if (MI.hasOrderedMemoryRef())
2069  return false;
2070 
2071  if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2072  return false;
2073 
2074  // Make sure this is a reg+imm.
2075  // FIXME: It is possible to extend it to handle reg+reg cases.
2077  return false;
2078 
2079  // Look backward up to LdStLimit instructions.
2081  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2082  ++NumLoadsFromStoresPromoted;
2083  // Promote the load. Keeping the iterator straight is a
2084  // pain, so we let the merge routine tell us what the next instruction
2085  // is after it's done mucking about.
2086  MBBI = promoteLoadFromStore(MBBI, StoreI);
2087  return true;
2088  }
2089  return false;
2090 }
2091 
2092 // Merge adjacent zero stores into a wider store.
2093 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2095  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2096  MachineInstr &MI = *MBBI;
2097  MachineBasicBlock::iterator E = MI.getParent()->end();
2098 
2099  if (!TII->isCandidateToMergeOrPair(MI))
2100  return false;
2101 
2102  // Look ahead up to LdStLimit instructions for a mergable instruction.
2103  LdStPairFlags Flags;
2104  MachineBasicBlock::iterator MergeMI =
2105  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2106  if (MergeMI != E) {
2107  ++NumZeroStoresPromoted;
2108 
2109  // Keeping the iterator straight is a pain, so we let the merge routine tell
2110  // us what the next instruction is after it's done mucking about.
2111  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2112  return true;
2113  }
2114  return false;
2115 }
2116 
2117 // Find loads and stores that can be merged into a single load or store pair
2118 // instruction.
2119 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2120  MachineInstr &MI = *MBBI;
2121  MachineBasicBlock::iterator E = MI.getParent()->end();
2122 
2123  if (!TII->isCandidateToMergeOrPair(MI))
2124  return false;
2125 
2126  // Early exit if the offset is not possible to match. (6 bits of positive
2127  // range, plus allow an extra one in case we find a later insn that matches
2128  // with Offset-1)
2129  bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2131  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2132  // Allow one more for offset.
2133  if (Offset > 0)
2134  Offset -= OffsetStride;
2135  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2136  return false;
2137 
2138  // Look ahead up to LdStLimit instructions for a pairable instruction.
2139  LdStPairFlags Flags;
2141  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2142  if (Paired != E) {
2143  ++NumPairCreated;
2144  if (TII->hasUnscaledLdStOffset(MI))
2145  ++NumUnscaledPairCreated;
2146  // Keeping the iterator straight is a pain, so we let the merge routine tell
2147  // us what the next instruction is after it's done mucking about.
2148  auto Prev = std::prev(MBBI);
2149  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2150  // Collect liveness info for instructions between Prev and the new position
2151  // MBBI.
2152  for (auto I = std::next(Prev); I != MBBI; I++)
2153  updateDefinedRegisters(*I, DefinedInBB, TRI);
2154 
2155  return true;
2156  }
2157  return false;
2158 }
2159 
2160 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2162  MachineInstr &MI = *MBBI;
2163  MachineBasicBlock::iterator E = MI.getParent()->end();
2165 
2166  // Look forward to try to form a post-index instruction. For example,
2167  // ldr x0, [x20]
2168  // add x20, x20, #32
2169  // merged into:
2170  // ldr x0, [x20], #32
2171  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2172  if (Update != E) {
2173  // Merge the update into the ld/st.
2174  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2175  return true;
2176  }
2177 
2178  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2179  if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2180  return false;
2181 
2182  // Look back to try to find a pre-index instruction. For example,
2183  // add x0, x0, #8
2184  // ldr x1, [x0]
2185  // merged into:
2186  // ldr x1, [x0, #8]!
2187  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2188  if (Update != E) {
2189  // Merge the update into the ld/st.
2190  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2191  return true;
2192  }
2193 
2194  // The immediate in the load/store is scaled by the size of the memory
2195  // operation. The immediate in the add we're looking for,
2196  // however, is not, so adjust here.
2197  int UnscaledOffset =
2198  AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2199 
2200  // Look forward to try to find a pre-index instruction. For example,
2201  // ldr x1, [x0, #64]
2202  // add x0, x0, #64
2203  // merged into:
2204  // ldr x1, [x0, #64]!
2205  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2206  if (Update != E) {
2207  // Merge the update into the ld/st.
2208  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2209  return true;
2210  }
2211 
2212  return false;
2213 }
2214 
2216  bool EnableNarrowZeroStOpt) {
2217 
2218  bool Modified = false;
2219  // Four tranformations to do here:
2220  // 1) Find loads that directly read from stores and promote them by
2221  // replacing with mov instructions. If the store is wider than the load,
2222  // the load will be replaced with a bitfield extract.
2223  // e.g.,
2224  // str w1, [x0, #4]
2225  // ldrh w2, [x0, #6]
2226  // ; becomes
2227  // str w1, [x0, #4]
2228  // lsr w2, w1, #16
2230  MBBI != E;) {
2231  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2232  Modified = true;
2233  else
2234  ++MBBI;
2235  }
2236  // 2) Merge adjacent zero stores into a wider store.
2237  // e.g.,
2238  // strh wzr, [x0]
2239  // strh wzr, [x0, #2]
2240  // ; becomes
2241  // str wzr, [x0]
2242  // e.g.,
2243  // str wzr, [x0]
2244  // str wzr, [x0, #4]
2245  // ; becomes
2246  // str xzr, [x0]
2247  if (EnableNarrowZeroStOpt)
2249  MBBI != E;) {
2250  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2251  Modified = true;
2252  else
2253  ++MBBI;
2254  }
2255  // 3) Find loads and stores that can be merged into a single load or store
2256  // pair instruction.
2257  // e.g.,
2258  // ldr x0, [x2]
2259  // ldr x1, [x2, #8]
2260  // ; becomes
2261  // ldp x0, x1, [x2]
2262 
2263  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2264  DefinedInBB.clear();
2265  DefinedInBB.addLiveIns(MBB);
2266  }
2267 
2269  MBBI != E;) {
2270  // Track currently live registers up to this point, to help with
2271  // searching for a rename register on demand.
2272  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2273  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2274  Modified = true;
2275  else
2276  ++MBBI;
2277  }
2278  // 4) Find base register updates that can be merged into the load or store
2279  // as a base-reg writeback.
2280  // e.g.,
2281  // ldr x0, [x2]
2282  // add x2, x2, #4
2283  // ; becomes
2284  // ldr x0, [x2], #4
2286  MBBI != E;) {
2287  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2288  Modified = true;
2289  else
2290  ++MBBI;
2291  }
2292 
2293  return Modified;
2294 }
2295 
2296 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2297  if (skipFunction(Fn.getFunction()))
2298  return false;
2299 
2300  Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
2301  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2302  TRI = Subtarget->getRegisterInfo();
2303  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2304 
2305  // Resize the modified and used register unit trackers. We do this once
2306  // per function and then clear the register units each time we optimize a load
2307  // or store.
2308  ModifiedRegUnits.init(*TRI);
2309  UsedRegUnits.init(*TRI);
2310  DefinedInBB.init(*TRI);
2311 
2312  bool Modified = false;
2313  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2314  for (auto &MBB : Fn) {
2315  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2316  Modified |= M;
2317  }
2318 
2319  return Modified;
2320 }
2321 
2322 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2323 // stores near one another? Note: The pre-RA instruction scheduler already has
2324 // hooks to try and schedule pairable loads/stores together to improve pairing
2325 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2326 
2327 // FIXME: When pairing store instructions it's very possible for this pass to
2328 // hoist a store with a KILL marker above another use (without a KILL marker).
2329 // The resulting IR is invalid, but nothing uses the KILL markers after this
2330 // pass, so it's never caused a problem in practice.
2331 
2332 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2333 /// load / store optimization pass.
2335  return new AArch64LoadStoreOpt();
2336 }
i
i
Definition: README.txt:29
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1289
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
MCDwarf.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:664
AArch64MachineFunctionInfo.h
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:1001
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1056
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:816
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:83
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:440
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:57
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition: AArch64InstrInfo.cpp:3189
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:181
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:678
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:639
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:299
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:127
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2334
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:178
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:1218
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1371
MachineRegisterInfo.h
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
llvm::MachineFunction::getFrameInstructions
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
Definition: MachineFunction.h:1056
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:195
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:219
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCCFIInstruction::OpDefCfa
@ OpDefCfa
Definition: MCDwarf.h:488
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:119
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:97
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:169
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:69
llvm::AAResults
Definition: AliasAnalysis.h:294
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:36
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:930
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:481
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1323
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:196
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1208
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:99
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:794
BitVector.h
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:86
llvm::MCCFIInstruction::getOperation
OpType getOperation() const
Definition: MCDwarf.h:641
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1286
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:592
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MCCFIInstruction
Definition: MCDwarf.h:478
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
isPreLdStPairCandidate
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:560
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1279
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:167
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:422
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MCCFIInstruction::OpDefCfaOffset
@ OpDefCfaOffset
Definition: MCDwarf.h:487
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:295
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:236
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:777
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:408
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:601
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
MCRegisterInfo.h
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1296
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:435
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1344
maybeMoveCFI
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
Definition: AArch64LoadStoreOptimizer.cpp:1762
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:204
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:621
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1187
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1009
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:219
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:182
llvm::AArch64InstrInfo::getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
Definition: AArch64InstrInfo.cpp:3213
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:646
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AArch64InstrInfo::isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
Definition: AArch64InstrInfo.cpp:3193
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1069
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:909
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:248
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:620
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:208
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:378
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:504
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:445
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:882
AArch64Subtarget.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::AArch64InstrInfo::getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the the immediate offset operator of a load/store.
Definition: AArch64InstrInfo.cpp:3221
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1142
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:3091
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::omp::RTLDependInfoFields::Flags
@ Flags
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:318
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:610
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1303
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
tryToFindRegisterToRename
static Optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1473