LLVM  14.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/Pass.h"
37 #include "llvm/Support/Debug.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <functional>
44 #include <iterator>
45 #include <limits>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "aarch64-ldst-opt"
50 
51 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
52 STATISTIC(NumPostFolded, "Number of post-index updates folded");
53 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
54 STATISTIC(NumUnscaledPairCreated,
55  "Number of load/store from unscaled generated");
56 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
57 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
58 
59 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
60  "Controls which pairs are considered for renaming");
61 
62 // The LdStLimit limits how far we search for load/store pairs.
63 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
64  cl::init(20), cl::Hidden);
65 
66 // The UpdateLimit limits how far we search for update instructions when we form
67 // pre-/post-index instructions.
68 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
69  cl::Hidden);
70 
71 // Enable register renaming to find additional store pairing opportunities.
72 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
73  cl::init(true), cl::Hidden);
74 
75 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
76 
77 namespace {
78 
79 using LdStPairFlags = struct LdStPairFlags {
80  // If a matching instruction is found, MergeForward is set to true if the
81  // merge is to remove the first instruction and replace the second with
82  // a pair-wise insn, and false if the reverse is true.
83  bool MergeForward = false;
84 
85  // SExtIdx gives the index of the result of the load pair that must be
86  // extended. The value of SExtIdx assumes that the paired load produces the
87  // value in this order: (I, returned iterator), i.e., -1 means no value has
88  // to be extended, 0 means I, and 1 means the returned iterator.
89  int SExtIdx = -1;
90 
91  // If not none, RenameReg can be used to rename the result register of the
92  // first store in a pair. Currently this only works when merging stores
93  // forward.
94  Optional<MCPhysReg> RenameReg = None;
95 
96  LdStPairFlags() = default;
97 
98  void setMergeForward(bool V = true) { MergeForward = V; }
99  bool getMergeForward() const { return MergeForward; }
100 
101  void setSExtIdx(int V) { SExtIdx = V; }
102  int getSExtIdx() const { return SExtIdx; }
103 
104  void setRenameReg(MCPhysReg R) { RenameReg = R; }
105  void clearRenameReg() { RenameReg = None; }
106  Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
107 };
108 
109 struct AArch64LoadStoreOpt : public MachineFunctionPass {
110  static char ID;
111 
112  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
114  }
115 
116  AliasAnalysis *AA;
117  const AArch64InstrInfo *TII;
118  const TargetRegisterInfo *TRI;
119  const AArch64Subtarget *Subtarget;
120 
121  // Track which register units have been modified and used.
122  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
123  LiveRegUnits DefinedInBB;
124 
125  void getAnalysisUsage(AnalysisUsage &AU) const override {
128  }
129 
130  // Scan the instructions looking for a load/store that can be combined
131  // with the current instruction into a load/store pair.
132  // Return the matching instruction if one is found, else MBB->end().
134  LdStPairFlags &Flags,
135  unsigned Limit,
136  bool FindNarrowMerge);
137 
138  // Scan the instructions looking for a store that writes to the address from
139  // which the current load instruction reads. Return true if one is found.
140  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
142 
143  // Merge the two instructions indicated into a wider narrow store instruction.
145  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
147  const LdStPairFlags &Flags);
148 
149  // Merge the two instructions indicated into a single pair-wise instruction.
151  mergePairedInsns(MachineBasicBlock::iterator I,
153  const LdStPairFlags &Flags);
154 
155  // Promote the load that reads directly from the address stored to.
157  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
159 
160  // Scan the instruction list to find a base register update that can
161  // be combined with the current instruction (a load or store) using
162  // pre or post indexed addressing with writeback. Scan forwards.
164  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
165  int UnscaledOffset, unsigned Limit);
166 
167  // Scan the instruction list to find a base register update that can
168  // be combined with the current instruction (a load or store) using
169  // pre or post indexed addressing with writeback. Scan backwards.
171  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
172 
173  // Find an instruction that updates the base register of the ld/st
174  // instruction.
175  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
176  unsigned BaseReg, int Offset);
177 
178  // Merge a pre- or post-index base register update into a ld/st instruction.
180  mergeUpdateInsn(MachineBasicBlock::iterator I,
181  MachineBasicBlock::iterator Update, bool IsPreIdx);
182 
183  // Find and merge zero store instructions.
184  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
185 
186  // Find and pair ldr/str instructions.
187  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
188 
189  // Find and promote load instructions which read directly from store.
190  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
191 
192  // Find and merge a base register updates before or after a ld/st instruction.
193  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
194 
195  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
196 
197  bool runOnMachineFunction(MachineFunction &Fn) override;
198 
199  MachineFunctionProperties getRequiredProperties() const override {
202  }
203 
204  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
205 };
206 
207 char AArch64LoadStoreOpt::ID = 0;
208 
209 } // end anonymous namespace
210 
211 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
212  AARCH64_LOAD_STORE_OPT_NAME, false, false)
213 
214 static bool isNarrowStore(unsigned Opc) {
215  switch (Opc) {
216  default:
217  return false;
218  case AArch64::STRBBui:
219  case AArch64::STURBBi:
220  case AArch64::STRHHui:
221  case AArch64::STURHHi:
222  return true;
223  }
224 }
225 
226 // These instruction set memory tag and either keep memory contents unchanged or
227 // set it to zero, ignoring the address part of the source register.
228 static bool isTagStore(const MachineInstr &MI) {
229  switch (MI.getOpcode()) {
230  default:
231  return false;
232  case AArch64::STGOffset:
233  case AArch64::STZGOffset:
234  case AArch64::ST2GOffset:
235  case AArch64::STZ2GOffset:
236  return true;
237  }
238 }
239 
240 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
241  bool *IsValidLdStrOpc = nullptr) {
242  if (IsValidLdStrOpc)
243  *IsValidLdStrOpc = true;
244  switch (Opc) {
245  default:
246  if (IsValidLdStrOpc)
247  *IsValidLdStrOpc = false;
249  case AArch64::STRDui:
250  case AArch64::STURDi:
251  case AArch64::STRDpre:
252  case AArch64::STRQui:
253  case AArch64::STURQi:
254  case AArch64::STRQpre:
255  case AArch64::STRBBui:
256  case AArch64::STURBBi:
257  case AArch64::STRHHui:
258  case AArch64::STURHHi:
259  case AArch64::STRWui:
260  case AArch64::STRWpre:
261  case AArch64::STURWi:
262  case AArch64::STRXui:
263  case AArch64::STRXpre:
264  case AArch64::STURXi:
265  case AArch64::LDRDui:
266  case AArch64::LDURDi:
267  case AArch64::LDRDpre:
268  case AArch64::LDRQui:
269  case AArch64::LDURQi:
270  case AArch64::LDRQpre:
271  case AArch64::LDRWui:
272  case AArch64::LDURWi:
273  case AArch64::LDRWpre:
274  case AArch64::LDRXui:
275  case AArch64::LDURXi:
276  case AArch64::LDRXpre:
277  case AArch64::STRSui:
278  case AArch64::STURSi:
279  case AArch64::STRSpre:
280  case AArch64::LDRSui:
281  case AArch64::LDURSi:
282  case AArch64::LDRSpre:
283  return Opc;
284  case AArch64::LDRSWui:
285  return AArch64::LDRWui;
286  case AArch64::LDURSWi:
287  return AArch64::LDURWi;
288  }
289 }
290 
291 static unsigned getMatchingWideOpcode(unsigned Opc) {
292  switch (Opc) {
293  default:
294  llvm_unreachable("Opcode has no wide equivalent!");
295  case AArch64::STRBBui:
296  return AArch64::STRHHui;
297  case AArch64::STRHHui:
298  return AArch64::STRWui;
299  case AArch64::STURBBi:
300  return AArch64::STURHHi;
301  case AArch64::STURHHi:
302  return AArch64::STURWi;
303  case AArch64::STURWi:
304  return AArch64::STURXi;
305  case AArch64::STRWui:
306  return AArch64::STRXui;
307  }
308 }
309 
310 static unsigned getMatchingPairOpcode(unsigned Opc) {
311  switch (Opc) {
312  default:
313  llvm_unreachable("Opcode has no pairwise equivalent!");
314  case AArch64::STRSui:
315  case AArch64::STURSi:
316  return AArch64::STPSi;
317  case AArch64::STRSpre:
318  return AArch64::STPSpre;
319  case AArch64::STRDui:
320  case AArch64::STURDi:
321  return AArch64::STPDi;
322  case AArch64::STRDpre:
323  return AArch64::STPDpre;
324  case AArch64::STRQui:
325  case AArch64::STURQi:
326  return AArch64::STPQi;
327  case AArch64::STRQpre:
328  return AArch64::STPQpre;
329  case AArch64::STRWui:
330  case AArch64::STURWi:
331  return AArch64::STPWi;
332  case AArch64::STRWpre:
333  return AArch64::STPWpre;
334  case AArch64::STRXui:
335  case AArch64::STURXi:
336  return AArch64::STPXi;
337  case AArch64::STRXpre:
338  return AArch64::STPXpre;
339  case AArch64::LDRSui:
340  case AArch64::LDURSi:
341  return AArch64::LDPSi;
342  case AArch64::LDRSpre:
343  return AArch64::LDPSpre;
344  case AArch64::LDRDui:
345  case AArch64::LDURDi:
346  return AArch64::LDPDi;
347  case AArch64::LDRDpre:
348  return AArch64::LDPDpre;
349  case AArch64::LDRQui:
350  case AArch64::LDURQi:
351  return AArch64::LDPQi;
352  case AArch64::LDRQpre:
353  return AArch64::LDPQpre;
354  case AArch64::LDRWui:
355  case AArch64::LDURWi:
356  return AArch64::LDPWi;
357  case AArch64::LDRWpre:
358  return AArch64::LDPWpre;
359  case AArch64::LDRXui:
360  case AArch64::LDURXi:
361  return AArch64::LDPXi;
362  case AArch64::LDRXpre:
363  return AArch64::LDPXpre;
364  case AArch64::LDRSWui:
365  case AArch64::LDURSWi:
366  return AArch64::LDPSWi;
367  }
368 }
369 
372  unsigned LdOpc = LoadInst.getOpcode();
373  unsigned StOpc = StoreInst.getOpcode();
374  switch (LdOpc) {
375  default:
376  llvm_unreachable("Unsupported load instruction!");
377  case AArch64::LDRBBui:
378  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
379  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
380  case AArch64::LDURBBi:
381  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
382  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
383  case AArch64::LDRHHui:
384  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
385  StOpc == AArch64::STRXui;
386  case AArch64::LDURHHi:
387  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
388  StOpc == AArch64::STURXi;
389  case AArch64::LDRWui:
390  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
391  case AArch64::LDURWi:
392  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
393  case AArch64::LDRXui:
394  return StOpc == AArch64::STRXui;
395  case AArch64::LDURXi:
396  return StOpc == AArch64::STURXi;
397  }
398 }
399 
400 static unsigned getPreIndexedOpcode(unsigned Opc) {
401  // FIXME: We don't currently support creating pre-indexed loads/stores when
402  // the load or store is the unscaled version. If we decide to perform such an
403  // optimization in the future the cases for the unscaled loads/stores will
404  // need to be added here.
405  switch (Opc) {
406  default:
407  llvm_unreachable("Opcode has no pre-indexed equivalent!");
408  case AArch64::STRSui:
409  return AArch64::STRSpre;
410  case AArch64::STRDui:
411  return AArch64::STRDpre;
412  case AArch64::STRQui:
413  return AArch64::STRQpre;
414  case AArch64::STRBBui:
415  return AArch64::STRBBpre;
416  case AArch64::STRHHui:
417  return AArch64::STRHHpre;
418  case AArch64::STRWui:
419  return AArch64::STRWpre;
420  case AArch64::STRXui:
421  return AArch64::STRXpre;
422  case AArch64::LDRSui:
423  return AArch64::LDRSpre;
424  case AArch64::LDRDui:
425  return AArch64::LDRDpre;
426  case AArch64::LDRQui:
427  return AArch64::LDRQpre;
428  case AArch64::LDRBBui:
429  return AArch64::LDRBBpre;
430  case AArch64::LDRHHui:
431  return AArch64::LDRHHpre;
432  case AArch64::LDRWui:
433  return AArch64::LDRWpre;
434  case AArch64::LDRXui:
435  return AArch64::LDRXpre;
436  case AArch64::LDRSWui:
437  return AArch64::LDRSWpre;
438  case AArch64::LDPSi:
439  return AArch64::LDPSpre;
440  case AArch64::LDPSWi:
441  return AArch64::LDPSWpre;
442  case AArch64::LDPDi:
443  return AArch64::LDPDpre;
444  case AArch64::LDPQi:
445  return AArch64::LDPQpre;
446  case AArch64::LDPWi:
447  return AArch64::LDPWpre;
448  case AArch64::LDPXi:
449  return AArch64::LDPXpre;
450  case AArch64::STPSi:
451  return AArch64::STPSpre;
452  case AArch64::STPDi:
453  return AArch64::STPDpre;
454  case AArch64::STPQi:
455  return AArch64::STPQpre;
456  case AArch64::STPWi:
457  return AArch64::STPWpre;
458  case AArch64::STPXi:
459  return AArch64::STPXpre;
460  case AArch64::STGOffset:
461  return AArch64::STGPreIndex;
462  case AArch64::STZGOffset:
463  return AArch64::STZGPreIndex;
464  case AArch64::ST2GOffset:
465  return AArch64::ST2GPreIndex;
466  case AArch64::STZ2GOffset:
467  return AArch64::STZ2GPreIndex;
468  case AArch64::STGPi:
469  return AArch64::STGPpre;
470  }
471 }
472 
473 static unsigned getPostIndexedOpcode(unsigned Opc) {
474  switch (Opc) {
475  default:
476  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
477  case AArch64::STRSui:
478  case AArch64::STURSi:
479  return AArch64::STRSpost;
480  case AArch64::STRDui:
481  case AArch64::STURDi:
482  return AArch64::STRDpost;
483  case AArch64::STRQui:
484  case AArch64::STURQi:
485  return AArch64::STRQpost;
486  case AArch64::STRBBui:
487  return AArch64::STRBBpost;
488  case AArch64::STRHHui:
489  return AArch64::STRHHpost;
490  case AArch64::STRWui:
491  case AArch64::STURWi:
492  return AArch64::STRWpost;
493  case AArch64::STRXui:
494  case AArch64::STURXi:
495  return AArch64::STRXpost;
496  case AArch64::LDRSui:
497  case AArch64::LDURSi:
498  return AArch64::LDRSpost;
499  case AArch64::LDRDui:
500  case AArch64::LDURDi:
501  return AArch64::LDRDpost;
502  case AArch64::LDRQui:
503  case AArch64::LDURQi:
504  return AArch64::LDRQpost;
505  case AArch64::LDRBBui:
506  return AArch64::LDRBBpost;
507  case AArch64::LDRHHui:
508  return AArch64::LDRHHpost;
509  case AArch64::LDRWui:
510  case AArch64::LDURWi:
511  return AArch64::LDRWpost;
512  case AArch64::LDRXui:
513  case AArch64::LDURXi:
514  return AArch64::LDRXpost;
515  case AArch64::LDRSWui:
516  return AArch64::LDRSWpost;
517  case AArch64::LDPSi:
518  return AArch64::LDPSpost;
519  case AArch64::LDPSWi:
520  return AArch64::LDPSWpost;
521  case AArch64::LDPDi:
522  return AArch64::LDPDpost;
523  case AArch64::LDPQi:
524  return AArch64::LDPQpost;
525  case AArch64::LDPWi:
526  return AArch64::LDPWpost;
527  case AArch64::LDPXi:
528  return AArch64::LDPXpost;
529  case AArch64::STPSi:
530  return AArch64::STPSpost;
531  case AArch64::STPDi:
532  return AArch64::STPDpost;
533  case AArch64::STPQi:
534  return AArch64::STPQpost;
535  case AArch64::STPWi:
536  return AArch64::STPWpost;
537  case AArch64::STPXi:
538  return AArch64::STPXpost;
539  case AArch64::STGOffset:
540  return AArch64::STGPostIndex;
541  case AArch64::STZGOffset:
542  return AArch64::STZGPostIndex;
543  case AArch64::ST2GOffset:
544  return AArch64::ST2GPostIndex;
545  case AArch64::STZ2GOffset:
546  return AArch64::STZ2GPostIndex;
547  case AArch64::STGPi:
548  return AArch64::STGPpost;
549  }
550 }
551 
552 static bool isPairedLdSt(const MachineInstr &MI) {
553  switch (MI.getOpcode()) {
554  default:
555  return false;
556  case AArch64::LDPSi:
557  case AArch64::LDPSWi:
558  case AArch64::LDPDi:
559  case AArch64::LDPQi:
560  case AArch64::LDPWi:
561  case AArch64::LDPXi:
562  case AArch64::STPSi:
563  case AArch64::STPDi:
564  case AArch64::STPQi:
565  case AArch64::STPWi:
566  case AArch64::STPXi:
567  case AArch64::STGPi:
568  return true;
569  }
570 }
571 
573 
574  unsigned OpcA = FirstMI.getOpcode();
575  unsigned OpcB = MI.getOpcode();
576 
577  switch (OpcA) {
578  default:
579  return false;
580  case AArch64::STRSpre:
581  return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
582  case AArch64::STRDpre:
583  return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
584  case AArch64::STRQpre:
585  return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
586  case AArch64::STRWpre:
587  return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
588  case AArch64::STRXpre:
589  return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
590  case AArch64::LDRSpre:
591  return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
592  case AArch64::LDRDpre:
593  return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
594  case AArch64::LDRQpre:
595  return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
596  case AArch64::LDRWpre:
597  return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
598  case AArch64::LDRXpre:
599  return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
600  }
601 }
602 
603 // Returns the scale and offset range of pre/post indexed variants of MI.
604 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
605  int &MinOffset, int &MaxOffset) {
606  bool IsPaired = isPairedLdSt(MI);
607  bool IsTagStore = isTagStore(MI);
608  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
609  // as in the "unsigned offset" variant.
610  // All other pre/post indexed ldst instructions are unscaled.
611  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
612 
613  if (IsPaired) {
614  MinOffset = -64;
615  MaxOffset = 63;
616  } else {
617  MinOffset = -256;
618  MaxOffset = 255;
619  }
620 }
621 
623  unsigned PairedRegOp = 0) {
624  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
625  bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
626  if (IsPreLdSt)
627  PairedRegOp += 1;
628  unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
629  return MI.getOperand(Idx);
630 }
631 
633  unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
634  return MI.getOperand(Idx);
635 }
636 
638  unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
639  return MI.getOperand(Idx);
640 }
641 
644  const AArch64InstrInfo *TII) {
645  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
646  int LoadSize = TII->getMemScale(LoadInst);
647  int StoreSize = TII->getMemScale(StoreInst);
648  int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
650  : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
651  int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
653  : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
654  return (UnscaledStOffset <= UnscaledLdOffset) &&
655  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
656 }
657 
659  unsigned Opc = MI.getOpcode();
660  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
661  isNarrowStore(Opc)) &&
662  getLdStRegOp(MI).getReg() == AArch64::WZR;
663 }
664 
666  switch (MI.getOpcode()) {
667  default:
668  return false;
669  // Scaled instructions.
670  case AArch64::LDRBBui:
671  case AArch64::LDRHHui:
672  case AArch64::LDRWui:
673  case AArch64::LDRXui:
674  // Unscaled instructions.
675  case AArch64::LDURBBi:
676  case AArch64::LDURHHi:
677  case AArch64::LDURWi:
678  case AArch64::LDURXi:
679  return true;
680  }
681 }
682 
684  unsigned Opc = MI.getOpcode();
685  switch (Opc) {
686  default:
687  return false;
688  // Scaled instructions.
689  case AArch64::STRSui:
690  case AArch64::STRDui:
691  case AArch64::STRQui:
692  case AArch64::STRXui:
693  case AArch64::STRWui:
694  case AArch64::STRHHui:
695  case AArch64::STRBBui:
696  case AArch64::LDRSui:
697  case AArch64::LDRDui:
698  case AArch64::LDRQui:
699  case AArch64::LDRXui:
700  case AArch64::LDRWui:
701  case AArch64::LDRHHui:
702  case AArch64::LDRBBui:
703  case AArch64::STGOffset:
704  case AArch64::STZGOffset:
705  case AArch64::ST2GOffset:
706  case AArch64::STZ2GOffset:
707  case AArch64::STGPi:
708  // Unscaled instructions.
709  case AArch64::STURSi:
710  case AArch64::STURDi:
711  case AArch64::STURQi:
712  case AArch64::STURWi:
713  case AArch64::STURXi:
714  case AArch64::LDURSi:
715  case AArch64::LDURDi:
716  case AArch64::LDURQi:
717  case AArch64::LDURWi:
718  case AArch64::LDURXi:
719  // Paired instructions.
720  case AArch64::LDPSi:
721  case AArch64::LDPSWi:
722  case AArch64::LDPDi:
723  case AArch64::LDPQi:
724  case AArch64::LDPWi:
725  case AArch64::LDPXi:
726  case AArch64::STPSi:
727  case AArch64::STPDi:
728  case AArch64::STPQi:
729  case AArch64::STPWi:
730  case AArch64::STPXi:
731  // Make sure this is a reg+imm (as opposed to an address reloc).
732  if (!getLdStOffsetOp(MI).isImm())
733  return false;
734 
735  return true;
736  }
737 }
738 
740 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
742  const LdStPairFlags &Flags) {
744  "Expected promotable zero stores.");
745 
746  MachineBasicBlock::iterator E = I->getParent()->end();
748  // If NextI is the second of the two instructions to be merged, we need
749  // to skip one further. Either way we merge will invalidate the iterator,
750  // and we don't need to scan the new instruction, as it's a pairwise
751  // instruction, which we're not considering for further action anyway.
752  if (NextI == MergeMI)
753  NextI = next_nodbg(NextI, E);
754 
755  unsigned Opc = I->getOpcode();
756  bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
757  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
758 
759  bool MergeForward = Flags.getMergeForward();
760  // Insert our new paired instruction after whichever of the paired
761  // instructions MergeForward indicates.
762  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
763  // Also based on MergeForward is from where we copy the base register operand
764  // so we get the flags compatible with the input code.
765  const MachineOperand &BaseRegOp =
766  MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
767 
768  // Which register is Rt and which is Rt2 depends on the offset order.
769  MachineInstr *RtMI;
770  if (getLdStOffsetOp(*I).getImm() ==
771  getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
772  RtMI = &*MergeMI;
773  else
774  RtMI = &*I;
775 
776  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
777  // Change the scaled offset from small to large type.
778  if (IsScaled) {
779  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
780  OffsetImm /= 2;
781  }
782 
783  // Construct the new instruction.
784  DebugLoc DL = I->getDebugLoc();
785  MachineBasicBlock *MBB = I->getParent();
787  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
788  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
789  .add(BaseRegOp)
790  .addImm(OffsetImm)
791  .cloneMergedMemRefs({&*I, &*MergeMI})
792  .setMIFlags(I->mergeFlagsWith(*MergeMI));
793  (void)MIB;
794 
795  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
796  LLVM_DEBUG(I->print(dbgs()));
797  LLVM_DEBUG(dbgs() << " ");
798  LLVM_DEBUG(MergeMI->print(dbgs()));
799  LLVM_DEBUG(dbgs() << " with instruction:\n ");
800  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
801  LLVM_DEBUG(dbgs() << "\n");
802 
803  // Erase the old instructions.
804  I->eraseFromParent();
805  MergeMI->eraseFromParent();
806  return NextI;
807 }
808 
809 // Apply Fn to all instructions between MI and the beginning of the block, until
810 // a def for DefReg is reached. Returns true, iff Fn returns true for all
811 // visited instructions. Stop after visiting Limit iterations.
813  const TargetRegisterInfo *TRI, unsigned Limit,
814  std::function<bool(MachineInstr &, bool)> &Fn) {
815  auto MBB = MI.getParent();
816  for (MachineInstr &I :
817  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
818  if (!Limit)
819  return false;
820  --Limit;
821 
822  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
823  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
824  TRI->regsOverlap(MOP.getReg(), DefReg);
825  });
826  if (!Fn(I, isDef))
827  return false;
828  if (isDef)
829  break;
830  }
831  return true;
832 }
833 
835  const TargetRegisterInfo *TRI) {
836 
837  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
838  if (MOP.isReg() && MOP.isKill())
839  Units.removeReg(MOP.getReg());
840 
841  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
842  if (MOP.isReg() && !MOP.isKill())
843  Units.addReg(MOP.getReg());
844 }
845 
847 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
849  const LdStPairFlags &Flags) {
850  MachineBasicBlock::iterator E = I->getParent()->end();
852  // If NextI is the second of the two instructions to be merged, we need
853  // to skip one further. Either way we merge will invalidate the iterator,
854  // and we don't need to scan the new instruction, as it's a pairwise
855  // instruction, which we're not considering for further action anyway.
856  if (NextI == Paired)
857  NextI = next_nodbg(NextI, E);
858 
859  int SExtIdx = Flags.getSExtIdx();
860  unsigned Opc =
861  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
862  bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
863  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
864 
865  bool MergeForward = Flags.getMergeForward();
866 
867  Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
868  if (MergeForward && RenameReg) {
869  MCRegister RegToRename = getLdStRegOp(*I).getReg();
870  DefinedInBB.addReg(*RenameReg);
871 
872  // Return the sub/super register for RenameReg, matching the size of
873  // OriginalReg.
874  auto GetMatchingSubReg = [this,
875  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
876  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
877  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
878  TRI->getMinimalPhysRegClass(SubOrSuper))
879  return SubOrSuper;
880  llvm_unreachable("Should have found matching sub or super register!");
881  };
882 
883  std::function<bool(MachineInstr &, bool)> UpdateMIs =
884  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
885  if (IsDef) {
886  bool SeenDef = false;
887  for (auto &MOP : MI.operands()) {
888  // Rename the first explicit definition and all implicit
889  // definitions matching RegToRename.
890  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
891  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
892  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
893  assert((MOP.isImplicit() ||
894  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
895  "Need renamable operands");
896  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
897  SeenDef = true;
898  }
899  }
900  } else {
901  for (auto &MOP : MI.operands()) {
902  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
903  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
904  assert((MOP.isImplicit() ||
905  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
906  "Need renamable operands");
907  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
908  }
909  }
910  }
911  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
912  return true;
913  };
914  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
915 
916 #if !defined(NDEBUG)
917  // Make sure the register used for renaming is not used between the paired
918  // instructions. That would trash the content before the new paired
919  // instruction.
920  for (auto &MI :
922  std::next(I), std::next(Paired)))
923  assert(all_of(MI.operands(),
924  [this, &RenameReg](const MachineOperand &MOP) {
925  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
926  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
927  }) &&
928  "Rename register used between paired instruction, trashing the "
929  "content");
930 #endif
931  }
932 
933  // Insert our new paired instruction after whichever of the paired
934  // instructions MergeForward indicates.
935  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
936  // Also based on MergeForward is from where we copy the base register operand
937  // so we get the flags compatible with the input code.
938  const MachineOperand &BaseRegOp =
939  MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
940 
941  int Offset = getLdStOffsetOp(*I).getImm();
942  int PairedOffset = getLdStOffsetOp(*Paired).getImm();
943  bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
944  if (IsUnscaled != PairedIsUnscaled) {
945  // We're trying to pair instructions that differ in how they are scaled. If
946  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
947  // the opposite (i.e., make Paired's offset unscaled).
948  int MemSize = TII->getMemScale(*Paired);
949  if (PairedIsUnscaled) {
950  // If the unscaled offset isn't a multiple of the MemSize, we can't
951  // pair the operations together.
952  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
953  "Offset should be a multiple of the stride!");
954  PairedOffset /= MemSize;
955  } else {
956  PairedOffset *= MemSize;
957  }
958  }
959 
960  // Which register is Rt and which is Rt2 depends on the offset order.
961  // However, for pre load/stores the Rt should be the one of the pre
962  // load/store.
963  MachineInstr *RtMI, *Rt2MI;
964  if (Offset == PairedOffset + OffsetStride &&
966  RtMI = &*Paired;
967  Rt2MI = &*I;
968  // Here we swapped the assumption made for SExtIdx.
969  // I.e., we turn ldp I, Paired into ldp Paired, I.
970  // Update the index accordingly.
971  if (SExtIdx != -1)
972  SExtIdx = (SExtIdx + 1) % 2;
973  } else {
974  RtMI = &*I;
975  Rt2MI = &*Paired;
976  }
977  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
978  // Scale the immediate offset, if necessary.
979  if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
980  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
981  "Unscaled offset cannot be scaled.");
982  OffsetImm /= TII->getMemScale(*RtMI);
983  }
984 
985  // Construct the new instruction.
987  DebugLoc DL = I->getDebugLoc();
988  MachineBasicBlock *MBB = I->getParent();
989  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
990  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
991  // Kill flags may become invalid when moving stores for pairing.
992  if (RegOp0.isUse()) {
993  if (!MergeForward) {
994  // Clear kill flags on store if moving upwards. Example:
995  // STRWui %w0, ...
996  // USE %w1
997  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
998  RegOp0.setIsKill(false);
999  RegOp1.setIsKill(false);
1000  } else {
1001  // Clear kill flags of the first stores register. Example:
1002  // STRWui %w1, ...
1003  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1004  // STRW %w0
1005  Register Reg = getLdStRegOp(*I).getReg();
1006  for (MachineInstr &MI : make_range(std::next(I), Paired))
1007  MI.clearRegisterKills(Reg, TRI);
1008  }
1009  }
1010 
1011  unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1012  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1013 
1014  // Adds the pre-index operand for pre-indexed ld/st pairs.
1015  if (AArch64InstrInfo::isPreLdSt(*RtMI))
1016  MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1017 
1018  MIB.add(RegOp0)
1019  .add(RegOp1)
1020  .add(BaseRegOp)
1021  .addImm(OffsetImm)
1022  .cloneMergedMemRefs({&*I, &*Paired})
1023  .setMIFlags(I->mergeFlagsWith(*Paired));
1024 
1025  (void)MIB;
1026 
1027  LLVM_DEBUG(
1028  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1029  LLVM_DEBUG(I->print(dbgs()));
1030  LLVM_DEBUG(dbgs() << " ");
1031  LLVM_DEBUG(Paired->print(dbgs()));
1032  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1033  if (SExtIdx != -1) {
1034  // Generate the sign extension for the proper result of the ldp.
1035  // I.e., with X1, that would be:
1036  // %w1 = KILL %w1, implicit-def %x1
1037  // %x1 = SBFMXri killed %x1, 0, 31
1038  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1039  // Right now, DstMO has the extended register, since it comes from an
1040  // extended opcode.
1041  Register DstRegX = DstMO.getReg();
1042  // Get the W variant of that register.
1043  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1044  // Update the result of LDP to use the W instead of the X variant.
1045  DstMO.setReg(DstRegW);
1046  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1047  LLVM_DEBUG(dbgs() << "\n");
1048  // Make the machine verifier happy by providing a definition for
1049  // the X register.
1050  // Insert this definition right after the generated LDP, i.e., before
1051  // InsertionPoint.
1052  MachineInstrBuilder MIBKill =
1053  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1054  .addReg(DstRegW)
1055  .addReg(DstRegX, RegState::Define);
1056  MIBKill->getOperand(2).setImplicit();
1057  // Create the sign extension.
1058  MachineInstrBuilder MIBSXTW =
1059  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1060  .addReg(DstRegX)
1061  .addImm(0)
1062  .addImm(31);
1063  (void)MIBSXTW;
1064  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1065  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1066  } else {
1067  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1068  }
1069  LLVM_DEBUG(dbgs() << "\n");
1070 
1071  if (MergeForward)
1072  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1073  if (MOP.isReg() && MOP.isKill())
1074  DefinedInBB.addReg(MOP.getReg());
1075 
1076  // Erase the old instructions.
1077  I->eraseFromParent();
1078  Paired->eraseFromParent();
1079 
1080  return NextI;
1081 }
1082 
1084 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1085  MachineBasicBlock::iterator StoreI) {
1087  next_nodbg(LoadI, LoadI->getParent()->end());
1088 
1089  int LoadSize = TII->getMemScale(*LoadI);
1090  int StoreSize = TII->getMemScale(*StoreI);
1091  Register LdRt = getLdStRegOp(*LoadI).getReg();
1092  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1093  Register StRt = getLdStRegOp(*StoreI).getReg();
1094  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1095 
1096  assert((IsStoreXReg ||
1097  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1098  "Unexpected RegClass");
1099 
1100  MachineInstr *BitExtMI;
1101  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1102  // Remove the load, if the destination register of the loads is the same
1103  // register for stored value.
1104  if (StRt == LdRt && LoadSize == 8) {
1105  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1106  LoadI->getIterator())) {
1107  if (MI.killsRegister(StRt, TRI)) {
1108  MI.clearRegisterKills(StRt, TRI);
1109  break;
1110  }
1111  }
1112  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1113  LLVM_DEBUG(LoadI->print(dbgs()));
1114  LLVM_DEBUG(dbgs() << "\n");
1115  LoadI->eraseFromParent();
1116  return NextI;
1117  }
1118  // Replace the load with a mov if the load and store are in the same size.
1119  BitExtMI =
1120  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1121  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1122  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1123  .add(StMO)
1125  .setMIFlags(LoadI->getFlags());
1126  } else {
1127  // FIXME: Currently we disable this transformation in big-endian targets as
1128  // performance and correctness are verified only in little-endian.
1129  if (!Subtarget->isLittleEndian())
1130  return NextI;
1131  bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1132  assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1133  "Unsupported ld/st match");
1134  assert(LoadSize <= StoreSize && "Invalid load size");
1135  int UnscaledLdOffset = IsUnscaled
1136  ? getLdStOffsetOp(*LoadI).getImm()
1137  : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1138  int UnscaledStOffset = IsUnscaled
1139  ? getLdStOffsetOp(*StoreI).getImm()
1140  : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1141  int Width = LoadSize * 8;
1142  unsigned DestReg =
1143  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1144  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1145  : LdRt;
1146 
1147  assert((UnscaledLdOffset >= UnscaledStOffset &&
1148  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1149  "Invalid offset");
1150 
1151  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1152  int Imms = Immr + Width - 1;
1153  if (UnscaledLdOffset == UnscaledStOffset) {
1154  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1155  | ((Immr) << 6) // immr
1156  | ((Imms) << 0) // imms
1157  ;
1158 
1159  BitExtMI =
1160  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1161  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1162  DestReg)
1163  .add(StMO)
1164  .addImm(AndMaskEncoded)
1165  .setMIFlags(LoadI->getFlags());
1166  } else {
1167  BitExtMI =
1168  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1169  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1170  DestReg)
1171  .add(StMO)
1172  .addImm(Immr)
1173  .addImm(Imms)
1174  .setMIFlags(LoadI->getFlags());
1175  }
1176  }
1177 
1178  // Clear kill flags between store and load.
1179  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1180  BitExtMI->getIterator()))
1181  if (MI.killsRegister(StRt, TRI)) {
1182  MI.clearRegisterKills(StRt, TRI);
1183  break;
1184  }
1185 
1186  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1187  LLVM_DEBUG(StoreI->print(dbgs()));
1188  LLVM_DEBUG(dbgs() << " ");
1189  LLVM_DEBUG(LoadI->print(dbgs()));
1190  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1191  LLVM_DEBUG(StoreI->print(dbgs()));
1192  LLVM_DEBUG(dbgs() << " ");
1193  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1194  LLVM_DEBUG(dbgs() << "\n");
1195 
1196  // Erase the old instructions.
1197  LoadI->eraseFromParent();
1198  return NextI;
1199 }
1200 
1201 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1202  // Convert the byte-offset used by unscaled into an "element" offset used
1203  // by the scaled pair load/store instructions.
1204  if (IsUnscaled) {
1205  // If the byte-offset isn't a multiple of the stride, there's no point
1206  // trying to match it.
1207  if (Offset % OffsetStride)
1208  return false;
1209  Offset /= OffsetStride;
1210  }
1211  return Offset <= 63 && Offset >= -64;
1212 }
1213 
1214 // Do alignment, specialized to power of 2 and for signed ints,
1215 // avoiding having to do a C-style cast from uint_64t to int when
1216 // using alignTo from include/llvm/Support/MathExtras.h.
1217 // FIXME: Move this function to include/MathExtras.h?
1218 static int alignTo(int Num, int PowOf2) {
1219  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1220 }
1221 
1222 static bool mayAlias(MachineInstr &MIa,
1224  AliasAnalysis *AA) {
1225  for (MachineInstr *MIb : MemInsns)
1226  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1227  return true;
1228 
1229  return false;
1230 }
1231 
1232 bool AArch64LoadStoreOpt::findMatchingStore(
1233  MachineBasicBlock::iterator I, unsigned Limit,
1234  MachineBasicBlock::iterator &StoreI) {
1235  MachineBasicBlock::iterator B = I->getParent()->begin();
1237  MachineInstr &LoadMI = *I;
1238  Register BaseReg = getLdStBaseOp(LoadMI).getReg();
1239 
1240  // If the load is the first instruction in the block, there's obviously
1241  // not any matching store.
1242  if (MBBI == B)
1243  return false;
1244 
1245  // Track which register units have been modified and used between the first
1246  // insn and the second insn.
1247  ModifiedRegUnits.clear();
1248  UsedRegUnits.clear();
1249 
1250  unsigned Count = 0;
1251  do {
1252  MBBI = prev_nodbg(MBBI, B);
1253  MachineInstr &MI = *MBBI;
1254 
1255  // Don't count transient instructions towards the search limit since there
1256  // may be different numbers of them if e.g. debug information is present.
1257  if (!MI.isTransient())
1258  ++Count;
1259 
1260  // If the load instruction reads directly from the address to which the
1261  // store instruction writes and the stored value is not modified, we can
1262  // promote the load. Since we do not handle stores with pre-/post-index,
1263  // it's unnecessary to check if BaseReg is modified by the store itself.
1264  // Also we can't handle stores without an immediate offset operand,
1265  // while the operand might be the address for a global variable.
1266  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1267  BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
1268  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1269  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1270  StoreI = MBBI;
1271  return true;
1272  }
1273 
1274  if (MI.isCall())
1275  return false;
1276 
1277  // Update modified / uses register units.
1278  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1279 
1280  // Otherwise, if the base register is modified, we have no match, so
1281  // return early.
1282  if (!ModifiedRegUnits.available(BaseReg))
1283  return false;
1284 
1285  // If we encounter a store aliased with the load, return early.
1286  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1287  return false;
1288  } while (MBBI != B && Count < Limit);
1289  return false;
1290 }
1291 
1292 // Returns true if FirstMI and MI are candidates for merging or pairing.
1293 // Otherwise, returns false.
1295  LdStPairFlags &Flags,
1296  const AArch64InstrInfo *TII) {
1297  // If this is volatile or if pairing is suppressed, not a candidate.
1298  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1299  return false;
1300 
1301  // We should have already checked FirstMI for pair suppression and volatility.
1302  assert(!FirstMI.hasOrderedMemoryRef() &&
1303  !TII->isLdStPairSuppressed(FirstMI) &&
1304  "FirstMI shouldn't get here if either of these checks are true.");
1305 
1306  unsigned OpcA = FirstMI.getOpcode();
1307  unsigned OpcB = MI.getOpcode();
1308 
1309  // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1310  if (OpcA == OpcB)
1311  return !AArch64InstrInfo::isPreLdSt(FirstMI);
1312 
1313  // Try to match a sign-extended load/store with a zero-extended load/store.
1314  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1315  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1316  assert(IsValidLdStrOpc &&
1317  "Given Opc should be a Load or Store with an immediate");
1318  // OpcA will be the first instruction in the pair.
1319  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1320  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1321  return true;
1322  }
1323 
1324  // If the second instruction isn't even a mergable/pairable load/store, bail
1325  // out.
1326  if (!PairIsValidLdStrOpc)
1327  return false;
1328 
1329  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1330  // offsets.
1331  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1332  return false;
1333 
1334  // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1335  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1336  // are candidate pairs that can be merged.
1337  if (isPreLdStPairCandidate(FirstMI, MI))
1338  return true;
1339 
1340  // Try to match an unscaled load/store with a scaled load/store.
1341  return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1343 
1344  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1345 }
1346 
1347 static bool
1348 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1350  const TargetRegisterInfo *TRI) {
1351  if (!FirstMI.mayStore())
1352  return false;
1353 
1354  // Check if we can find an unused register which we can use to rename
1355  // the register used by the first load/store.
1356  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1357  MachineFunction &MF = *FirstMI.getParent()->getParent();
1358  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1359  return false;
1360 
1361  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1362  // For now, we only rename if the store operand gets killed at the store.
1363  if (!getLdStRegOp(FirstMI).isKill() &&
1364  !any_of(FirstMI.operands(),
1365  [TRI, RegToRename](const MachineOperand &MOP) {
1366  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1367  MOP.isImplicit() && MOP.isKill() &&
1368  TRI->regsOverlap(RegToRename, MOP.getReg());
1369  })) {
1370  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1371  return false;
1372  }
1373  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1374  if (MOP.isReg()) {
1375  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1376  // Renaming registers with multiple disjunct sub-registers (e.g. the
1377  // result of a LD3) means that all sub-registers are renamed, potentially
1378  // impacting other instructions we did not check. Bail out.
1379  // Note that this relies on the structure of the AArch64 register file. In
1380  // particular, a subregister cannot be written without overwriting the
1381  // whole register.
1382  if (RegClass->HasDisjunctSubRegs) {
1383  LLVM_DEBUG(
1384  dbgs()
1385  << " Cannot rename operands with multiple disjunct subregisters ("
1386  << MOP << ")\n");
1387  return false;
1388  }
1389  }
1390  return MOP.isImplicit() ||
1391  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1392  };
1393 
1394  bool FoundDef = false;
1395 
1396  // For each instruction between FirstMI and the previous def for RegToRename,
1397  // we
1398  // * check if we can rename RegToRename in this instruction
1399  // * collect the registers used and required register classes for RegToRename.
1400  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1401  bool IsDef) {
1402  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1403  // Currently we do not try to rename across frame-setup instructions.
1404  if (MI.getFlag(MachineInstr::FrameSetup)) {
1405  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1406  << MI << ")\n");
1407  return false;
1408  }
1409 
1410  UsedInBetween.accumulate(MI);
1411 
1412  // For a definition, check that we can rename the definition and exit the
1413  // loop.
1414  FoundDef = IsDef;
1415 
1416  // For defs, check if we can rename the first def of RegToRename.
1417  if (FoundDef) {
1418  // For some pseudo instructions, we might not generate code in the end
1419  // (e.g. KILL) and we would end up without a correct def for the rename
1420  // register.
1421  // TODO: This might be overly conservative and we could handle those cases
1422  // in multiple ways:
1423  // 1. Insert an extra copy, to materialize the def.
1424  // 2. Skip pseudo-defs until we find an non-pseudo def.
1425  if (MI.isPseudo()) {
1426  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1427  << "\n");
1428  return false;
1429  }
1430 
1431  for (auto &MOP : MI.operands()) {
1432  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1433  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1434  continue;
1435  if (!canRenameMOP(MOP)) {
1436  LLVM_DEBUG(dbgs()
1437  << " Cannot rename " << MOP << " in " << MI << "\n");
1438  return false;
1439  }
1440  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1441  }
1442  return true;
1443  } else {
1444  for (auto &MOP : MI.operands()) {
1445  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1446  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1447  continue;
1448 
1449  if (!canRenameMOP(MOP)) {
1450  LLVM_DEBUG(dbgs()
1451  << " Cannot rename " << MOP << " in " << MI << "\n");
1452  return false;
1453  }
1454  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1455  }
1456  }
1457  return true;
1458  };
1459 
1460  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1461  return false;
1462 
1463  if (!FoundDef) {
1464  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1465  return false;
1466  }
1467  return true;
1468 }
1469 
1470 // Check if we can find a physical register for renaming. This register must:
1471 // * not be defined up to FirstMI (checking DefinedInBB)
1472 // * not used between the MI and the defining instruction of the register to
1473 // rename (checked using UsedInBetween).
1474 // * is available in all used register classes (checked using RequiredClasses).
1476  MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
1477  LiveRegUnits &UsedInBetween,
1479  const TargetRegisterInfo *TRI) {
1480  auto &MF = *FirstMI.getParent()->getParent();
1481  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1482 
1483  // Checks if any sub- or super-register of PR is callee saved.
1484  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1486  [&MF, TRI](MCPhysReg SubOrSuper) {
1487  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1488  });
1489  };
1490 
1491  // Check if PR or one of its sub- or super-registers can be used for all
1492  // required register classes.
1493  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1494  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1496  [C, TRI](MCPhysReg SubOrSuper) {
1497  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1498  });
1499  });
1500  };
1501 
1502  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1503  for (const MCPhysReg &PR : *RegClass) {
1504  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1505  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1506  CanBeUsedForAllClasses(PR)) {
1507  DefinedInBB.addReg(PR);
1508  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1509  << "\n");
1510  return {PR};
1511  }
1512  }
1513  LLVM_DEBUG(dbgs() << "No rename register found from "
1514  << TRI->getRegClassName(RegClass) << "\n");
1515  return None;
1516 }
1517 
1518 /// Scan the instructions looking for a load/store that can be combined with the
1519 /// current instruction into a wider equivalent or a load/store pair.
1521 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1522  LdStPairFlags &Flags, unsigned Limit,
1523  bool FindNarrowMerge) {
1524  MachineBasicBlock::iterator E = I->getParent()->end();
1526  MachineBasicBlock::iterator MBBIWithRenameReg;
1527  MachineInstr &FirstMI = *I;
1528  MBBI = next_nodbg(MBBI, E);
1529 
1530  bool MayLoad = FirstMI.mayLoad();
1531  bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1532  Register Reg = getLdStRegOp(FirstMI).getReg();
1533  Register BaseReg = getLdStBaseOp(FirstMI).getReg();
1534  int Offset = getLdStOffsetOp(FirstMI).getImm();
1535  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1536  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1537 
1538  Optional<bool> MaybeCanRename = None;
1539  if (!EnableRenaming)
1540  MaybeCanRename = {false};
1541 
1543  LiveRegUnits UsedInBetween;
1544  UsedInBetween.init(*TRI);
1545 
1546  Flags.clearRenameReg();
1547 
1548  // Track which register units have been modified and used between the first
1549  // insn (inclusive) and the second insn.
1550  ModifiedRegUnits.clear();
1551  UsedRegUnits.clear();
1552 
1553  // Remember any instructions that read/write memory between FirstMI and MI.
1555 
1556  for (unsigned Count = 0; MBBI != E && Count < Limit;
1557  MBBI = next_nodbg(MBBI, E)) {
1558  MachineInstr &MI = *MBBI;
1559 
1560  UsedInBetween.accumulate(MI);
1561 
1562  // Don't count transient instructions towards the search limit since there
1563  // may be different numbers of them if e.g. debug information is present.
1564  if (!MI.isTransient())
1565  ++Count;
1566 
1567  Flags.setSExtIdx(-1);
1568  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1569  getLdStOffsetOp(MI).isImm()) {
1570  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1571  // If we've found another instruction with the same opcode, check to see
1572  // if the base and offset are compatible with our starting instruction.
1573  // These instructions all have scaled immediate operands, so we just
1574  // check for +1/-1. Make sure to check the new instruction offset is
1575  // actually an immediate and not a symbolic reference destined for
1576  // a relocation.
1577  Register MIBaseReg = getLdStBaseOp(MI).getReg();
1578  int MIOffset = getLdStOffsetOp(MI).getImm();
1579  bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1580  if (IsUnscaled != MIIsUnscaled) {
1581  // We're trying to pair instructions that differ in how they are scaled.
1582  // If FirstMI is scaled then scale the offset of MI accordingly.
1583  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1584  int MemSize = TII->getMemScale(MI);
1585  if (MIIsUnscaled) {
1586  // If the unscaled offset isn't a multiple of the MemSize, we can't
1587  // pair the operations together: bail and keep looking.
1588  if (MIOffset % MemSize) {
1589  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1590  UsedRegUnits, TRI);
1591  MemInsns.push_back(&MI);
1592  continue;
1593  }
1594  MIOffset /= MemSize;
1595  } else {
1596  MIOffset *= MemSize;
1597  }
1598  }
1599 
1600  bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1601 
1602  if (BaseReg == MIBaseReg) {
1603  // If the offset of the second ld/st is not equal to the size of the
1604  // destination register it can’t be paired with a pre-index ld/st
1605  // pair. Additionally if the base reg is used or modified the operations
1606  // can't be paired: bail and keep looking.
1607  if (IsPreLdSt) {
1608  bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1609  bool IsBaseRegUsed =
1610  !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
1611  bool IsBaseRegModified =
1612  !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
1613  // If the stored value and the address of the second instruction is
1614  // the same, it needs to be using the updated register and therefore
1615  // it must not be folded.
1616  bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
1617  getLdStBaseOp(MI).getReg());
1618  if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1619  IsMIRegTheSame) {
1620  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1621  UsedRegUnits, TRI);
1622  MemInsns.push_back(&MI);
1623  continue;
1624  }
1625  } else {
1626  if ((Offset != MIOffset + OffsetStride) &&
1627  (Offset + OffsetStride != MIOffset)) {
1628  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1629  UsedRegUnits, TRI);
1630  MemInsns.push_back(&MI);
1631  continue;
1632  }
1633  }
1634 
1635  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1636  if (FindNarrowMerge) {
1637  // If the alignment requirements of the scaled wide load/store
1638  // instruction can't express the offset of the scaled narrow input,
1639  // bail and keep looking. For promotable zero stores, allow only when
1640  // the stored value is the same (i.e., WZR).
1641  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1642  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1643  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1644  UsedRegUnits, TRI);
1645  MemInsns.push_back(&MI);
1646  continue;
1647  }
1648  } else {
1649  // Pairwise instructions have a 7-bit signed offset field. Single
1650  // insns have a 12-bit unsigned offset field. If the resultant
1651  // immediate offset of merging these instructions is out of range for
1652  // a pairwise instruction, bail and keep looking.
1653  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1654  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1655  UsedRegUnits, TRI);
1656  MemInsns.push_back(&MI);
1657  continue;
1658  }
1659  // If the alignment requirements of the paired (scaled) instruction
1660  // can't express the offset of the unscaled input, bail and keep
1661  // looking.
1662  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1663  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1664  UsedRegUnits, TRI);
1665  MemInsns.push_back(&MI);
1666  continue;
1667  }
1668  }
1669  // If the destination register of one load is the same register or a
1670  // sub/super register of the other load, bail and keep looking. A
1671  // load-pair instruction with both destination registers the same is
1672  // UNPREDICTABLE and will result in an exception.
1673  if (MayLoad &&
1675  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1676  TRI);
1677  MemInsns.push_back(&MI);
1678  continue;
1679  }
1680 
1681  // If the BaseReg has been modified, then we cannot do the optimization.
1682  // For example, in the following pattern
1683  // ldr x1 [x2]
1684  // ldr x2 [x3]
1685  // ldr x4 [x2, #8],
1686  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1687  if (!ModifiedRegUnits.available(BaseReg))
1688  return E;
1689 
1690  // If the Rt of the second instruction was not modified or used between
1691  // the two instructions and none of the instructions between the second
1692  // and first alias with the second, we can combine the second into the
1693  // first.
1694  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1695  !(MI.mayLoad() &&
1696  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1697  !mayAlias(MI, MemInsns, AA)) {
1698 
1699  Flags.setMergeForward(false);
1700  Flags.clearRenameReg();
1701  return MBBI;
1702  }
1703 
1704  // Likewise, if the Rt of the first instruction is not modified or used
1705  // between the two instructions and none of the instructions between the
1706  // first and the second alias with the first, we can combine the first
1707  // into the second.
1708  if (!(MayLoad &&
1709  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1710  !mayAlias(FirstMI, MemInsns, AA)) {
1711 
1712  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1713  Flags.setMergeForward(true);
1714  Flags.clearRenameReg();
1715  return MBBI;
1716  }
1717 
1718  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1719  if (!MaybeCanRename)
1720  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1721  RequiredClasses, TRI)};
1722 
1723  if (*MaybeCanRename) {
1725  FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
1726  TRI);
1727  if (MaybeRenameReg) {
1728  Flags.setRenameReg(*MaybeRenameReg);
1729  Flags.setMergeForward(true);
1730  MBBIWithRenameReg = MBBI;
1731  }
1732  }
1733  }
1734  }
1735  // Unable to combine these instructions due to interference in between.
1736  // Keep looking.
1737  }
1738  }
1739 
1740  if (Flags.getRenameReg())
1741  return MBBIWithRenameReg;
1742 
1743  // If the instruction wasn't a matching load or store. Stop searching if we
1744  // encounter a call instruction that might modify memory.
1745  if (MI.isCall())
1746  return E;
1747 
1748  // Update modified / uses register units.
1749  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1750 
1751  // Otherwise, if the base register is modified, we have no match, so
1752  // return early.
1753  if (!ModifiedRegUnits.available(BaseReg))
1754  return E;
1755 
1756  // Update list of instructions that read/write memory.
1757  if (MI.mayLoadOrStore())
1758  MemInsns.push_back(&MI);
1759  }
1760  return E;
1761 }
1762 
1764 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1766  bool IsPreIdx) {
1767  assert((Update->getOpcode() == AArch64::ADDXri ||
1768  Update->getOpcode() == AArch64::SUBXri) &&
1769  "Unexpected base register update instruction to merge!");
1770  MachineBasicBlock::iterator E = I->getParent()->end();
1772  // Return the instruction following the merged instruction, which is
1773  // the instruction following our unmerged load. Unless that's the add/sub
1774  // instruction we're merging, in which case it's the one after that.
1775  if (NextI == Update)
1776  NextI = next_nodbg(NextI, E);
1777 
1778  int Value = Update->getOperand(2).getImm();
1779  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1780  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1781  if (Update->getOpcode() == AArch64::SUBXri)
1782  Value = -Value;
1783 
1784  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1786  MachineInstrBuilder MIB;
1787  int Scale, MinOffset, MaxOffset;
1788  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1789  if (!isPairedLdSt(*I)) {
1790  // Non-paired instruction.
1791  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1792  .add(getLdStRegOp(*Update))
1793  .add(getLdStRegOp(*I))
1794  .add(getLdStBaseOp(*I))
1795  .addImm(Value / Scale)
1796  .setMemRefs(I->memoperands())
1797  .setMIFlags(I->mergeFlagsWith(*Update));
1798  } else {
1799  // Paired instruction.
1800  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1801  .add(getLdStRegOp(*Update))
1802  .add(getLdStRegOp(*I, 0))
1803  .add(getLdStRegOp(*I, 1))
1804  .add(getLdStBaseOp(*I))
1805  .addImm(Value / Scale)
1806  .setMemRefs(I->memoperands())
1807  .setMIFlags(I->mergeFlagsWith(*Update));
1808  }
1809  (void)MIB;
1810 
1811  if (IsPreIdx) {
1812  ++NumPreFolded;
1813  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1814  } else {
1815  ++NumPostFolded;
1816  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1817  }
1818  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1819  LLVM_DEBUG(I->print(dbgs()));
1820  LLVM_DEBUG(dbgs() << " ");
1821  LLVM_DEBUG(Update->print(dbgs()));
1822  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1823  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1824  LLVM_DEBUG(dbgs() << "\n");
1825 
1826  // Erase the old instructions for the block.
1827  I->eraseFromParent();
1828  Update->eraseFromParent();
1829 
1830  return NextI;
1831 }
1832 
1833 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1834  MachineInstr &MI,
1835  unsigned BaseReg, int Offset) {
1836  switch (MI.getOpcode()) {
1837  default:
1838  break;
1839  case AArch64::SUBXri:
1840  case AArch64::ADDXri:
1841  // Make sure it's a vanilla immediate operand, not a relocation or
1842  // anything else we can't handle.
1843  if (!MI.getOperand(2).isImm())
1844  break;
1845  // Watch out for 1 << 12 shifted value.
1846  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1847  break;
1848 
1849  // The update instruction source and destination register must be the
1850  // same as the load/store base register.
1851  if (MI.getOperand(0).getReg() != BaseReg ||
1852  MI.getOperand(1).getReg() != BaseReg)
1853  break;
1854 
1855  int UpdateOffset = MI.getOperand(2).getImm();
1856  if (MI.getOpcode() == AArch64::SUBXri)
1857  UpdateOffset = -UpdateOffset;
1858 
1859  // The immediate must be a multiple of the scaling factor of the pre/post
1860  // indexed instruction.
1861  int Scale, MinOffset, MaxOffset;
1862  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1863  if (UpdateOffset % Scale != 0)
1864  break;
1865 
1866  // Scaled offset must fit in the instruction immediate.
1867  int ScaledOffset = UpdateOffset / Scale;
1868  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1869  break;
1870 
1871  // If we have a non-zero Offset, we check that it matches the amount
1872  // we're adding to the register.
1873  if (!Offset || Offset == UpdateOffset)
1874  return true;
1875  break;
1876  }
1877  return false;
1878 }
1879 
1880 static bool needsWinCFI(const MachineFunction *MF) {
1881  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1883 }
1884 
1885 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1886  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1887  MachineBasicBlock::iterator E = I->getParent()->end();
1888  MachineInstr &MemMI = *I;
1890 
1891  Register BaseReg = getLdStBaseOp(MemMI).getReg();
1892  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
1893 
1894  // Scan forward looking for post-index opportunities. Updating instructions
1895  // can't be formed if the memory instruction doesn't have the offset we're
1896  // looking for.
1897  if (MIUnscaledOffset != UnscaledOffset)
1898  return E;
1899 
1900  // If the base register overlaps a source/destination register, we can't
1901  // merge the update. This does not apply to tag store instructions which
1902  // ignore the address part of the source register.
1903  // This does not apply to STGPi as well, which does not have unpredictable
1904  // behavior in this case unlike normal stores, and always performs writeback
1905  // after reading the source register value.
1906  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1907  bool IsPairedInsn = isPairedLdSt(MemMI);
1908  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1909  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1910  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1911  return E;
1912  }
1913  }
1914 
1915  // Track which register units have been modified and used between the first
1916  // insn (inclusive) and the second insn.
1917  ModifiedRegUnits.clear();
1918  UsedRegUnits.clear();
1919  MBBI = next_nodbg(MBBI, E);
1920 
1921  // We can't post-increment the stack pointer if any instruction between
1922  // the memory access (I) and the increment (MBBI) can access the memory
1923  // region defined by [SP, MBBI].
1924  const bool BaseRegSP = BaseReg == AArch64::SP;
1925  if (BaseRegSP && needsWinCFI(I->getMF())) {
1926  // FIXME: For now, we always block the optimization over SP in windows
1927  // targets as it requires to adjust the unwind/debug info, messing up
1928  // the unwind info can actually cause a miscompile.
1929  return E;
1930  }
1931 
1932  for (unsigned Count = 0; MBBI != E && Count < Limit;
1933  MBBI = next_nodbg(MBBI, E)) {
1934  MachineInstr &MI = *MBBI;
1935 
1936  // Don't count transient instructions towards the search limit since there
1937  // may be different numbers of them if e.g. debug information is present.
1938  if (!MI.isTransient())
1939  ++Count;
1940 
1941  // If we found a match, return it.
1942  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1943  return MBBI;
1944 
1945  // Update the status of what the instruction clobbered and used.
1946  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1947 
1948  // Otherwise, if the base register is used or modified, we have no match, so
1949  // return early.
1950  // If we are optimizing SP, do not allow instructions that may load or store
1951  // in between the load and the optimized value update.
1952  if (!ModifiedRegUnits.available(BaseReg) ||
1953  !UsedRegUnits.available(BaseReg) ||
1954  (BaseRegSP && MBBI->mayLoadOrStore()))
1955  return E;
1956  }
1957  return E;
1958 }
1959 
1960 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1961  MachineBasicBlock::iterator I, unsigned Limit) {
1962  MachineBasicBlock::iterator B = I->getParent()->begin();
1963  MachineBasicBlock::iterator E = I->getParent()->end();
1964  MachineInstr &MemMI = *I;
1966  MachineFunction &MF = *MemMI.getMF();
1967 
1968  Register BaseReg = getLdStBaseOp(MemMI).getReg();
1969  int Offset = getLdStOffsetOp(MemMI).getImm();
1970 
1971  // If the load/store is the first instruction in the block, there's obviously
1972  // not any matching update. Ditto if the memory offset isn't zero.
1973  if (MBBI == B || Offset != 0)
1974  return E;
1975  // If the base register overlaps a destination register, we can't
1976  // merge the update.
1977  if (!isTagStore(MemMI)) {
1978  bool IsPairedInsn = isPairedLdSt(MemMI);
1979  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1980  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1981  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1982  return E;
1983  }
1984  }
1985 
1986  const bool BaseRegSP = BaseReg == AArch64::SP;
1987  if (BaseRegSP && needsWinCFI(I->getMF())) {
1988  // FIXME: For now, we always block the optimization over SP in windows
1989  // targets as it requires to adjust the unwind/debug info, messing up
1990  // the unwind info can actually cause a miscompile.
1991  return E;
1992  }
1993 
1994  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1995  unsigned RedZoneSize =
1996  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
1997 
1998  // Track which register units have been modified and used between the first
1999  // insn (inclusive) and the second insn.
2000  ModifiedRegUnits.clear();
2001  UsedRegUnits.clear();
2002  unsigned Count = 0;
2003  bool MemAcessBeforeSPPreInc = false;
2004  do {
2005  MBBI = prev_nodbg(MBBI, B);
2006  MachineInstr &MI = *MBBI;
2007 
2008  // Don't count transient instructions towards the search limit since there
2009  // may be different numbers of them if e.g. debug information is present.
2010  if (!MI.isTransient())
2011  ++Count;
2012 
2013  // If we found a match, return it.
2014  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2015  // Check that the update value is within our red zone limit (which may be
2016  // zero).
2017  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2018  return E;
2019  return MBBI;
2020  }
2021 
2022  // Update the status of what the instruction clobbered and used.
2023  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2024 
2025  // Otherwise, if the base register is used or modified, we have no match, so
2026  // return early.
2027  if (!ModifiedRegUnits.available(BaseReg) ||
2028  !UsedRegUnits.available(BaseReg))
2029  return E;
2030  // Keep track if we have a memory access before an SP pre-increment, in this
2031  // case we need to validate later that the update amount respects the red
2032  // zone.
2033  if (BaseRegSP && MBBI->mayLoadOrStore())
2034  MemAcessBeforeSPPreInc = true;
2035  } while (MBBI != B && Count < Limit);
2036  return E;
2037 }
2038 
2039 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2041  MachineInstr &MI = *MBBI;
2042  // If this is a volatile load, don't mess with it.
2043  if (MI.hasOrderedMemoryRef())
2044  return false;
2045 
2046  // Make sure this is a reg+imm.
2047  // FIXME: It is possible to extend it to handle reg+reg cases.
2048  if (!getLdStOffsetOp(MI).isImm())
2049  return false;
2050 
2051  // Look backward up to LdStLimit instructions.
2053  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2054  ++NumLoadsFromStoresPromoted;
2055  // Promote the load. Keeping the iterator straight is a
2056  // pain, so we let the merge routine tell us what the next instruction
2057  // is after it's done mucking about.
2058  MBBI = promoteLoadFromStore(MBBI, StoreI);
2059  return true;
2060  }
2061  return false;
2062 }
2063 
2064 // Merge adjacent zero stores into a wider store.
2065 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2067  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2068  MachineInstr &MI = *MBBI;
2069  MachineBasicBlock::iterator E = MI.getParent()->end();
2070 
2071  if (!TII->isCandidateToMergeOrPair(MI))
2072  return false;
2073 
2074  // Look ahead up to LdStLimit instructions for a mergable instruction.
2075  LdStPairFlags Flags;
2076  MachineBasicBlock::iterator MergeMI =
2077  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2078  if (MergeMI != E) {
2079  ++NumZeroStoresPromoted;
2080 
2081  // Keeping the iterator straight is a pain, so we let the merge routine tell
2082  // us what the next instruction is after it's done mucking about.
2083  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2084  return true;
2085  }
2086  return false;
2087 }
2088 
2089 // Find loads and stores that can be merged into a single load or store pair
2090 // instruction.
2091 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2092  MachineInstr &MI = *MBBI;
2093  MachineBasicBlock::iterator E = MI.getParent()->end();
2094 
2095  if (!TII->isCandidateToMergeOrPair(MI))
2096  return false;
2097 
2098  // Early exit if the offset is not possible to match. (6 bits of positive
2099  // range, plus allow an extra one in case we find a later insn that matches
2100  // with Offset-1)
2101  bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2102  int Offset = getLdStOffsetOp(MI).getImm();
2103  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2104  // Allow one more for offset.
2105  if (Offset > 0)
2106  Offset -= OffsetStride;
2107  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2108  return false;
2109 
2110  // Look ahead up to LdStLimit instructions for a pairable instruction.
2111  LdStPairFlags Flags;
2113  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2114  if (Paired != E) {
2115  ++NumPairCreated;
2116  if (TII->hasUnscaledLdStOffset(MI))
2117  ++NumUnscaledPairCreated;
2118  // Keeping the iterator straight is a pain, so we let the merge routine tell
2119  // us what the next instruction is after it's done mucking about.
2120  auto Prev = std::prev(MBBI);
2121  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2122  // Collect liveness info for instructions between Prev and the new position
2123  // MBBI.
2124  for (auto I = std::next(Prev); I != MBBI; I++)
2125  updateDefinedRegisters(*I, DefinedInBB, TRI);
2126 
2127  return true;
2128  }
2129  return false;
2130 }
2131 
2132 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2134  MachineInstr &MI = *MBBI;
2135  MachineBasicBlock::iterator E = MI.getParent()->end();
2137 
2138  // Look forward to try to form a post-index instruction. For example,
2139  // ldr x0, [x20]
2140  // add x20, x20, #32
2141  // merged into:
2142  // ldr x0, [x20], #32
2143  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2144  if (Update != E) {
2145  // Merge the update into the ld/st.
2146  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2147  return true;
2148  }
2149 
2150  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2151  if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2152  return false;
2153 
2154  // Look back to try to find a pre-index instruction. For example,
2155  // add x0, x0, #8
2156  // ldr x1, [x0]
2157  // merged into:
2158  // ldr x1, [x0, #8]!
2159  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2160  if (Update != E) {
2161  // Merge the update into the ld/st.
2162  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2163  return true;
2164  }
2165 
2166  // The immediate in the load/store is scaled by the size of the memory
2167  // operation. The immediate in the add we're looking for,
2168  // however, is not, so adjust here.
2169  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2170 
2171  // Look forward to try to find a pre-index instruction. For example,
2172  // ldr x1, [x0, #64]
2173  // add x0, x0, #64
2174  // merged into:
2175  // ldr x1, [x0, #64]!
2176  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2177  if (Update != E) {
2178  // Merge the update into the ld/st.
2179  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2180  return true;
2181  }
2182 
2183  return false;
2184 }
2185 
2187  bool EnableNarrowZeroStOpt) {
2188 
2189  bool Modified = false;
2190  // Four tranformations to do here:
2191  // 1) Find loads that directly read from stores and promote them by
2192  // replacing with mov instructions. If the store is wider than the load,
2193  // the load will be replaced with a bitfield extract.
2194  // e.g.,
2195  // str w1, [x0, #4]
2196  // ldrh w2, [x0, #6]
2197  // ; becomes
2198  // str w1, [x0, #4]
2199  // lsr w2, w1, #16
2201  MBBI != E;) {
2202  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2203  Modified = true;
2204  else
2205  ++MBBI;
2206  }
2207  // 2) Merge adjacent zero stores into a wider store.
2208  // e.g.,
2209  // strh wzr, [x0]
2210  // strh wzr, [x0, #2]
2211  // ; becomes
2212  // str wzr, [x0]
2213  // e.g.,
2214  // str wzr, [x0]
2215  // str wzr, [x0, #4]
2216  // ; becomes
2217  // str xzr, [x0]
2218  if (EnableNarrowZeroStOpt)
2220  MBBI != E;) {
2221  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2222  Modified = true;
2223  else
2224  ++MBBI;
2225  }
2226  // 3) Find loads and stores that can be merged into a single load or store
2227  // pair instruction.
2228  // e.g.,
2229  // ldr x0, [x2]
2230  // ldr x1, [x2, #8]
2231  // ; becomes
2232  // ldp x0, x1, [x2]
2233 
2234  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2235  DefinedInBB.clear();
2236  DefinedInBB.addLiveIns(MBB);
2237  }
2238 
2240  MBBI != E;) {
2241  // Track currently live registers up to this point, to help with
2242  // searching for a rename register on demand.
2243  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2244  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2245  Modified = true;
2246  else
2247  ++MBBI;
2248  }
2249  // 4) Find base register updates that can be merged into the load or store
2250  // as a base-reg writeback.
2251  // e.g.,
2252  // ldr x0, [x2]
2253  // add x2, x2, #4
2254  // ; becomes
2255  // ldr x0, [x2], #4
2257  MBBI != E;) {
2258  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2259  Modified = true;
2260  else
2261  ++MBBI;
2262  }
2263 
2264  return Modified;
2265 }
2266 
2267 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2268  if (skipFunction(Fn.getFunction()))
2269  return false;
2270 
2271  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
2272  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2273  TRI = Subtarget->getRegisterInfo();
2274  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2275 
2276  // Resize the modified and used register unit trackers. We do this once
2277  // per function and then clear the register units each time we optimize a load
2278  // or store.
2279  ModifiedRegUnits.init(*TRI);
2280  UsedRegUnits.init(*TRI);
2281  DefinedInBB.init(*TRI);
2282 
2283  bool Modified = false;
2284  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2285  for (auto &MBB : Fn) {
2286  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2287  Modified |= M;
2288  }
2289 
2290  return Modified;
2291 }
2292 
2293 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2294 // stores near one another? Note: The pre-RA instruction scheduler already has
2295 // hooks to try and schedule pairable loads/stores together to improve pairing
2296 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2297 
2298 // FIXME: When pairing store instructions it's very possible for this pass to
2299 // hoist a store with a KILL marker above another use (without a KILL marker).
2300 // The resulting IR is invalid, but nothing uses the KILL markers after this
2301 // pass, so it's never caused a problem in practice.
2302 
2303 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2304 /// load / store optimization pass.
2306  return new AArch64LoadStoreOpt();
2307 }
i
i
Definition: README.txt:29
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1230
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:683
AArch64MachineFunctionInfo.h
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
tryToFindRegisterToRename
static Optional< MCPhysReg > tryToFindRegisterToRename(MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1475
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:942
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1005
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:834
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:75
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:441
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:49
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition: AArch64InstrInfo.cpp:3061
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:324
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:666
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:658
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:291
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2305
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:321
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1379
MachineRegisterInfo.h
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:197
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:211
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:118
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:93
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:640
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:380
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:60
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:38
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:928
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:473
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1331
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:173
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:82
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1222
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:99
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:812
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:418
BitVector.h
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:86
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1294
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:604
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:552
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
llvm::None
const NoneType None
Definition: None.h:23
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
isPreLdStPairCandidate
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:572
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1880
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:165
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:739
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:258
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:228
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:745
getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:637
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:400
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:577
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
MCRegisterInfo.h
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1237
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:436
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1348
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:207
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:642
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1201
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:219
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:325
getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:632
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:665
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:626
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:911
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:240
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:647
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:208
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:370
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:495
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:413
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:446
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:831
AArch64Subtarget.h
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::AArch64Subtarget::requiresStrictAlign
bool requiresStrictAlign() const
Definition: AArch64Subtarget.h:367
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1094
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:2963
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:211
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:310
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:778
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:618
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:622
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1244
getReg
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:572
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37