LLVM 23.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
24#include "llvm/ADT/SetVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringRef.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/MC/MCAsmInfo.h"
40#include "llvm/MC/MCDwarf.h"
41#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <functional>
49#include <iterator>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "aarch64-ldst-opt"
56
57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded, "Number of post-index updates folded");
59STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60STATISTIC(NumUnscaledPairCreated,
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
65 "not passed the alignment check");
66STATISTIC(NumConstOffsetFolded,
67 "Number of const offset of index address folded");
68STATISTIC(NumUMOVFoldedToFPRStore,
69 "Number of UMOV + GPR stores folded to FPR stores");
70
71DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
72 "Controls which pairs are considered for renaming");
73
74// The LdStLimit limits how far we search for load/store pairs.
75static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
76 cl::init(20), cl::Hidden);
77
78// The UpdateLimit limits how far we search for update instructions when we form
79// pre-/post-index instructions.
80static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
82
83// The LdStConstLimit limits how far we search for const offset instructions
84// when we form index address load/store instructions.
85static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
86 cl::init(10), cl::Hidden);
87
88// The UMOVFoldLimit limits how far back we scan from a GPR store to find a
89// UMOV that can be folded into a direct FPR store.
90static cl::opt<unsigned> UMOVFoldLimit("aarch64-umov-fold-scan-limit",
91 cl::init(16), cl::Hidden);
92
93// Enable register renaming to find additional store pairing opportunities.
94static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
95 cl::init(true), cl::Hidden);
96
97#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
98
99namespace {
100
101using LdStPairFlags = struct LdStPairFlags {
102 // If a matching instruction is found, MergeForward is set to true if the
103 // merge is to remove the first instruction and replace the second with
104 // a pair-wise insn, and false if the reverse is true.
105 bool MergeForward = false;
106
107 // SExtIdx gives the index of the result of the load pair that must be
108 // extended. The value of SExtIdx assumes that the paired load produces the
109 // value in this order: (I, returned iterator), i.e., -1 means no value has
110 // to be extended, 0 means I, and 1 means the returned iterator.
111 int SExtIdx = -1;
112
113 // If not none, RenameReg can be used to rename the result register of the
114 // first store in a pair. Currently this only works when merging stores
115 // forward.
116 std::optional<MCPhysReg> RenameReg;
117
118 LdStPairFlags() = default;
119
120 void setMergeForward(bool V = true) { MergeForward = V; }
121 bool getMergeForward() const { return MergeForward; }
122
123 void setSExtIdx(int V) { SExtIdx = V; }
124 int getSExtIdx() const { return SExtIdx; }
125
126 void setRenameReg(MCPhysReg R) { RenameReg = R; }
127 void clearRenameReg() { RenameReg = std::nullopt; }
128 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
129};
130
131struct AArch64LoadStoreOpt {
133 const AArch64InstrInfo *TII;
134 const TargetRegisterInfo *TRI;
135 const AArch64Subtarget *Subtarget;
136
137 // Track which register units have been modified and used.
138 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
139 LiveRegUnits DefinedInBB;
140
141 // Scan the instructions looking for a load/store that can be combined
142 // with the current instruction into a load/store pair.
143 // Return the matching instruction if one is found, else MBB->end().
145 LdStPairFlags &Flags,
146 unsigned Limit,
147 bool FindNarrowMerge);
148
149 // Scan the instructions looking for a store that writes to the address from
150 // which the current load instruction reads. Return true if one is found.
151 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
153
154 // Merge the two instructions indicated into a wider narrow store instruction.
156 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
158 const LdStPairFlags &Flags);
159
160 // Merge the two instructions indicated into a single pair-wise instruction.
162 mergePairedInsns(MachineBasicBlock::iterator I,
164 const LdStPairFlags &Flags);
165
166 // Promote the load that reads directly from the address stored to.
168 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
170
171 // Scan the instruction list to find a base register update that can
172 // be combined with the current instruction (a load or store) using
173 // pre or post indexed addressing with writeback. Scan forwards.
175 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
176 int UnscaledOffset, unsigned Limit);
177
178 // Scan the instruction list to find a register assigned with a const
179 // value that can be combined with the current instruction (a load or store)
180 // using base addressing with writeback. Scan backwards.
182 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
183 unsigned &Offset);
184
185 // Scan the instruction list to find a base register update that can
186 // be combined with the current instruction (a load or store) using
187 // pre or post indexed addressing with writeback. Scan backwards.
188 // `MergeEither` is set to true if the combined instruction may be placed
189 // either at the location of the load/store instruction or at the location of
190 // the update instruction.
192 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
193 bool &MergeEither);
194
195 // Find an instruction that updates the base register of the ld/st
196 // instruction.
197 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
198 unsigned BaseReg, int Offset);
199
200 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
201 unsigned IndexReg, unsigned &Offset);
202
203 // Merge a pre- or post-index base register update into a ld/st instruction.
204 std::optional<MachineBasicBlock::iterator>
205 mergeUpdateInsn(MachineBasicBlock::iterator I,
206 MachineBasicBlock::iterator Update, bool IsForward,
207 bool IsPreIdx, bool MergeEither);
208
210 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
211 MachineBasicBlock::iterator Update, unsigned Offset,
212 int Scale);
213
214 // Find and merge zero store instructions.
215 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
216
217 // Find and pair ldr/str instructions.
218 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
219
220 // Find and promote load instructions which read directly from store.
221 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
222
223 // Find and merge a base register updates before or after a ld/st instruction.
224 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
225
226 // Find and merge an index ldr/st instruction into a base ld/st instruction.
227 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
228
229 // Replace a UMOV (lane 0) + GPR store with a direct FPR sub-register store.
230 bool tryToReplaceUMOVStore(MachineBasicBlock::iterator &MBBI);
231
232 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
233
234 bool runOnMachineFunction(MachineFunction &MF);
235};
236
237struct AArch64LoadStoreOptLegacy : public MachineFunctionPass {
238 static char ID;
239
240 AArch64LoadStoreOptLegacy() : MachineFunctionPass(ID) {}
241
242 bool runOnMachineFunction(MachineFunction &Fn) override;
243
244 void getAnalysisUsage(AnalysisUsage &AU) const override {
247 }
248
249 MachineFunctionProperties getRequiredProperties() const override {
250 return MachineFunctionProperties().setNoVRegs();
251 }
252
253 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
254};
255
256char AArch64LoadStoreOptLegacy::ID = 0;
257
258} // end anonymous namespace
259
260INITIALIZE_PASS(AArch64LoadStoreOptLegacy, "aarch64-ldst-opt",
261 AARCH64_LOAD_STORE_OPT_NAME, false, false)
262
263static bool isNarrowStore(unsigned Opc) {
264 switch (Opc) {
265 default:
266 return false;
267 case AArch64::STRBBui:
268 case AArch64::STURBBi:
269 case AArch64::STRHHui:
270 case AArch64::STURHHi:
271 return true;
272 }
273}
274
275// These instruction set memory tag and either keep memory contents unchanged or
276// set it to zero, ignoring the address part of the source register.
277static bool isTagStore(const MachineInstr &MI) {
278 switch (MI.getOpcode()) {
279 default:
280 return false;
281 case AArch64::STGi:
282 case AArch64::STZGi:
283 case AArch64::ST2Gi:
284 case AArch64::STZ2Gi:
285 return true;
286 }
287}
288
289static unsigned getMatchingNonSExtOpcode(unsigned Opc,
290 bool *IsValidLdStrOpc = nullptr) {
291 if (IsValidLdStrOpc)
292 *IsValidLdStrOpc = true;
293 switch (Opc) {
294 default:
295 if (IsValidLdStrOpc)
296 *IsValidLdStrOpc = false;
297 return std::numeric_limits<unsigned>::max();
298 case AArch64::STRDui:
299 case AArch64::STURDi:
300 case AArch64::STRDpre:
301 case AArch64::STRQui:
302 case AArch64::STURQi:
303 case AArch64::STRQpre:
304 case AArch64::STRBBui:
305 case AArch64::STURBBi:
306 case AArch64::STRHHui:
307 case AArch64::STURHHi:
308 case AArch64::STRWui:
309 case AArch64::STRWpre:
310 case AArch64::STURWi:
311 case AArch64::STRXui:
312 case AArch64::STRXpre:
313 case AArch64::STURXi:
314 case AArch64::STR_ZXI:
315 case AArch64::LDRDui:
316 case AArch64::LDURDi:
317 case AArch64::LDRDpre:
318 case AArch64::LDRQui:
319 case AArch64::LDURQi:
320 case AArch64::LDRQpre:
321 case AArch64::LDRWui:
322 case AArch64::LDURWi:
323 case AArch64::LDRWpre:
324 case AArch64::LDRXui:
325 case AArch64::LDURXi:
326 case AArch64::LDRXpre:
327 case AArch64::STRSui:
328 case AArch64::STURSi:
329 case AArch64::STRSpre:
330 case AArch64::LDRSui:
331 case AArch64::LDURSi:
332 case AArch64::LDRSpre:
333 case AArch64::LDR_ZXI:
334 return Opc;
335 case AArch64::LDRSWui:
336 return AArch64::LDRWui;
337 case AArch64::LDURSWi:
338 return AArch64::LDURWi;
339 case AArch64::LDRSWpre:
340 return AArch64::LDRWpre;
341 }
342}
343
344static unsigned getMatchingWideOpcode(unsigned Opc) {
345 switch (Opc) {
346 default:
347 llvm_unreachable("Opcode has no wide equivalent!");
348 case AArch64::STRBBui:
349 return AArch64::STRHHui;
350 case AArch64::STRHHui:
351 return AArch64::STRWui;
352 case AArch64::STURBBi:
353 return AArch64::STURHHi;
354 case AArch64::STURHHi:
355 return AArch64::STURWi;
356 case AArch64::STURWi:
357 return AArch64::STURXi;
358 case AArch64::STRWui:
359 return AArch64::STRXui;
360 }
361}
362
363static unsigned getMatchingPairOpcode(unsigned Opc) {
364 switch (Opc) {
365 default:
366 llvm_unreachable("Opcode has no pairwise equivalent!");
367 case AArch64::STRSui:
368 case AArch64::STURSi:
369 return AArch64::STPSi;
370 case AArch64::STRSpre:
371 return AArch64::STPSpre;
372 case AArch64::STRDui:
373 case AArch64::STURDi:
374 return AArch64::STPDi;
375 case AArch64::STRDpre:
376 return AArch64::STPDpre;
377 case AArch64::STRQui:
378 case AArch64::STURQi:
379 case AArch64::STR_ZXI:
380 return AArch64::STPQi;
381 case AArch64::STRQpre:
382 return AArch64::STPQpre;
383 case AArch64::STRWui:
384 case AArch64::STURWi:
385 return AArch64::STPWi;
386 case AArch64::STRWpre:
387 return AArch64::STPWpre;
388 case AArch64::STRXui:
389 case AArch64::STURXi:
390 return AArch64::STPXi;
391 case AArch64::STRXpre:
392 return AArch64::STPXpre;
393 case AArch64::LDRSui:
394 case AArch64::LDURSi:
395 return AArch64::LDPSi;
396 case AArch64::LDRSpre:
397 return AArch64::LDPSpre;
398 case AArch64::LDRDui:
399 case AArch64::LDURDi:
400 return AArch64::LDPDi;
401 case AArch64::LDRDpre:
402 return AArch64::LDPDpre;
403 case AArch64::LDRQui:
404 case AArch64::LDURQi:
405 case AArch64::LDR_ZXI:
406 return AArch64::LDPQi;
407 case AArch64::LDRQpre:
408 return AArch64::LDPQpre;
409 case AArch64::LDRWui:
410 case AArch64::LDURWi:
411 return AArch64::LDPWi;
412 case AArch64::LDRWpre:
413 return AArch64::LDPWpre;
414 case AArch64::LDRXui:
415 case AArch64::LDURXi:
416 return AArch64::LDPXi;
417 case AArch64::LDRXpre:
418 return AArch64::LDPXpre;
419 case AArch64::LDRSWui:
420 case AArch64::LDURSWi:
421 return AArch64::LDPSWi;
422 case AArch64::LDRSWpre:
423 return AArch64::LDPSWpre;
424 }
425}
426
429 unsigned LdOpc = LoadInst.getOpcode();
430 unsigned StOpc = StoreInst.getOpcode();
431 switch (LdOpc) {
432 default:
433 llvm_unreachable("Unsupported load instruction!");
434 case AArch64::LDRBBui:
435 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
436 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURBBi:
438 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
439 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRHHui:
441 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
442 StOpc == AArch64::STRXui;
443 case AArch64::LDURHHi:
444 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
445 StOpc == AArch64::STURXi;
446 case AArch64::LDRWui:
447 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
448 case AArch64::LDURWi:
449 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
450 case AArch64::LDRXui:
451 return StOpc == AArch64::STRXui;
452 case AArch64::LDURXi:
453 return StOpc == AArch64::STURXi;
454 }
455}
456
457static unsigned getPreIndexedOpcode(unsigned Opc) {
458 // FIXME: We don't currently support creating pre-indexed loads/stores when
459 // the load or store is the unscaled version. If we decide to perform such an
460 // optimization in the future the cases for the unscaled loads/stores will
461 // need to be added here.
462 switch (Opc) {
463 default:
464 llvm_unreachable("Opcode has no pre-indexed equivalent!");
465 case AArch64::STRBui:
466 return AArch64::STRBpre;
467 case AArch64::STRHui:
468 return AArch64::STRHpre;
469 case AArch64::STRSui:
470 return AArch64::STRSpre;
471 case AArch64::STRDui:
472 return AArch64::STRDpre;
473 case AArch64::STRQui:
474 return AArch64::STRQpre;
475 case AArch64::STRBBui:
476 return AArch64::STRBBpre;
477 case AArch64::STRHHui:
478 return AArch64::STRHHpre;
479 case AArch64::STRWui:
480 return AArch64::STRWpre;
481 case AArch64::STRXui:
482 return AArch64::STRXpre;
483 case AArch64::LDRBui:
484 return AArch64::LDRBpre;
485 case AArch64::LDRHui:
486 return AArch64::LDRHpre;
487 case AArch64::LDRSui:
488 return AArch64::LDRSpre;
489 case AArch64::LDRDui:
490 return AArch64::LDRDpre;
491 case AArch64::LDRQui:
492 return AArch64::LDRQpre;
493 case AArch64::LDRBBui:
494 return AArch64::LDRBBpre;
495 case AArch64::LDRHHui:
496 return AArch64::LDRHHpre;
497 case AArch64::LDRWui:
498 return AArch64::LDRWpre;
499 case AArch64::LDRXui:
500 return AArch64::LDRXpre;
501 case AArch64::LDRSWui:
502 return AArch64::LDRSWpre;
503 case AArch64::LDPSi:
504 return AArch64::LDPSpre;
505 case AArch64::LDPSWi:
506 return AArch64::LDPSWpre;
507 case AArch64::LDPDi:
508 return AArch64::LDPDpre;
509 case AArch64::LDPQi:
510 return AArch64::LDPQpre;
511 case AArch64::LDPWi:
512 return AArch64::LDPWpre;
513 case AArch64::LDPXi:
514 return AArch64::LDPXpre;
515 case AArch64::STPSi:
516 return AArch64::STPSpre;
517 case AArch64::STPDi:
518 return AArch64::STPDpre;
519 case AArch64::STPQi:
520 return AArch64::STPQpre;
521 case AArch64::STPWi:
522 return AArch64::STPWpre;
523 case AArch64::STPXi:
524 return AArch64::STPXpre;
525 case AArch64::STGi:
526 return AArch64::STGPreIndex;
527 case AArch64::STZGi:
528 return AArch64::STZGPreIndex;
529 case AArch64::ST2Gi:
530 return AArch64::ST2GPreIndex;
531 case AArch64::STZ2Gi:
532 return AArch64::STZ2GPreIndex;
533 case AArch64::STGPi:
534 return AArch64::STGPpre;
535 }
536}
537
538static unsigned getBaseAddressOpcode(unsigned Opc) {
539 // TODO: Add more index address stores.
540 switch (Opc) {
541 default:
542 llvm_unreachable("Opcode has no base address equivalent!");
543 case AArch64::LDRBroX:
544 return AArch64::LDRBui;
545 case AArch64::LDRBBroX:
546 return AArch64::LDRBBui;
547 case AArch64::LDRSBXroX:
548 return AArch64::LDRSBXui;
549 case AArch64::LDRSBWroX:
550 return AArch64::LDRSBWui;
551 case AArch64::LDRHroX:
552 return AArch64::LDRHui;
553 case AArch64::LDRHHroX:
554 return AArch64::LDRHHui;
555 case AArch64::LDRSHXroX:
556 return AArch64::LDRSHXui;
557 case AArch64::LDRSHWroX:
558 return AArch64::LDRSHWui;
559 case AArch64::LDRWroX:
560 return AArch64::LDRWui;
561 case AArch64::LDRSroX:
562 return AArch64::LDRSui;
563 case AArch64::LDRSWroX:
564 return AArch64::LDRSWui;
565 case AArch64::LDRDroX:
566 return AArch64::LDRDui;
567 case AArch64::LDRXroX:
568 return AArch64::LDRXui;
569 case AArch64::LDRQroX:
570 return AArch64::LDRQui;
571 }
572}
573
574static unsigned getPostIndexedOpcode(unsigned Opc) {
575 switch (Opc) {
576 default:
577 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
578 case AArch64::STRBui:
579 return AArch64::STRBpost;
580 case AArch64::STRHui:
581 return AArch64::STRHpost;
582 case AArch64::STRSui:
583 case AArch64::STURSi:
584 return AArch64::STRSpost;
585 case AArch64::STRDui:
586 case AArch64::STURDi:
587 return AArch64::STRDpost;
588 case AArch64::STRQui:
589 case AArch64::STURQi:
590 return AArch64::STRQpost;
591 case AArch64::STRBBui:
592 return AArch64::STRBBpost;
593 case AArch64::STRHHui:
594 return AArch64::STRHHpost;
595 case AArch64::STRWui:
596 case AArch64::STURWi:
597 return AArch64::STRWpost;
598 case AArch64::STRXui:
599 case AArch64::STURXi:
600 return AArch64::STRXpost;
601 case AArch64::LDRBui:
602 return AArch64::LDRBpost;
603 case AArch64::LDRHui:
604 return AArch64::LDRHpost;
605 case AArch64::LDRSui:
606 case AArch64::LDURSi:
607 return AArch64::LDRSpost;
608 case AArch64::LDRDui:
609 case AArch64::LDURDi:
610 return AArch64::LDRDpost;
611 case AArch64::LDRQui:
612 case AArch64::LDURQi:
613 return AArch64::LDRQpost;
614 case AArch64::LDRBBui:
615 return AArch64::LDRBBpost;
616 case AArch64::LDRHHui:
617 return AArch64::LDRHHpost;
618 case AArch64::LDRWui:
619 case AArch64::LDURWi:
620 return AArch64::LDRWpost;
621 case AArch64::LDRXui:
622 case AArch64::LDURXi:
623 return AArch64::LDRXpost;
624 case AArch64::LDRSWui:
625 return AArch64::LDRSWpost;
626 case AArch64::LDPSi:
627 return AArch64::LDPSpost;
628 case AArch64::LDPSWi:
629 return AArch64::LDPSWpost;
630 case AArch64::LDPDi:
631 return AArch64::LDPDpost;
632 case AArch64::LDPQi:
633 return AArch64::LDPQpost;
634 case AArch64::LDPWi:
635 return AArch64::LDPWpost;
636 case AArch64::LDPXi:
637 return AArch64::LDPXpost;
638 case AArch64::STPSi:
639 return AArch64::STPSpost;
640 case AArch64::STPDi:
641 return AArch64::STPDpost;
642 case AArch64::STPQi:
643 return AArch64::STPQpost;
644 case AArch64::STPWi:
645 return AArch64::STPWpost;
646 case AArch64::STPXi:
647 return AArch64::STPXpost;
648 case AArch64::STGi:
649 return AArch64::STGPostIndex;
650 case AArch64::STZGi:
651 return AArch64::STZGPostIndex;
652 case AArch64::ST2Gi:
653 return AArch64::ST2GPostIndex;
654 case AArch64::STZ2Gi:
655 return AArch64::STZ2GPostIndex;
656 case AArch64::STGPi:
657 return AArch64::STGPpost;
658 }
659}
660
662
663 unsigned OpcA = FirstMI.getOpcode();
664 unsigned OpcB = MI.getOpcode();
665
666 switch (OpcA) {
667 default:
668 return false;
669 case AArch64::STRSpre:
670 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
671 case AArch64::STRDpre:
672 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
673 case AArch64::STRQpre:
674 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
675 case AArch64::STRWpre:
676 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
677 case AArch64::STRXpre:
678 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
679 case AArch64::LDRSpre:
680 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
681 case AArch64::LDRDpre:
682 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
683 case AArch64::LDRQpre:
684 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
685 case AArch64::LDRWpre:
686 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
687 case AArch64::LDRXpre:
688 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
689 case AArch64::LDRSWpre:
690 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
691 }
692}
693
694// Returns the scale and offset range of pre/post indexed variants of MI.
695static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
696 int &MinOffset, int &MaxOffset) {
697 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
698 bool IsTagStore = isTagStore(MI);
699 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
700 // as in the "unsigned offset" variant.
701 // All other pre/post indexed ldst instructions are unscaled.
702 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
703
704 if (IsPaired) {
705 MinOffset = -64;
706 MaxOffset = 63;
707 } else {
708 MinOffset = -256;
709 MaxOffset = 255;
710 }
711}
712
714 unsigned PairedRegOp = 0) {
715 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
716 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
717 if (IsPreLdSt)
718 PairedRegOp += 1;
719 unsigned Idx =
720 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
721 return MI.getOperand(Idx);
722}
723
726 const AArch64InstrInfo *TII) {
727 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
728 int LoadSize = TII->getMemScale(LoadInst);
729 int StoreSize = TII->getMemScale(StoreInst);
730 int UnscaledStOffset =
731 TII->hasUnscaledLdStOffset(StoreInst)
734 int UnscaledLdOffset =
735 TII->hasUnscaledLdStOffset(LoadInst)
738 return (UnscaledStOffset <= UnscaledLdOffset) &&
739 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
740}
741
743 unsigned Opc = MI.getOpcode();
744 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
745 isNarrowStore(Opc)) &&
746 getLdStRegOp(MI).getReg() == AArch64::WZR;
747}
748
750 switch (MI.getOpcode()) {
751 default:
752 return false;
753 // Scaled instructions.
754 case AArch64::LDRBBui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRWui:
757 case AArch64::LDRXui:
758 // Unscaled instructions.
759 case AArch64::LDURBBi:
760 case AArch64::LDURHHi:
761 case AArch64::LDURWi:
762 case AArch64::LDURXi:
763 return true;
764 }
765}
766
768 unsigned Opc = MI.getOpcode();
769 switch (Opc) {
770 default:
771 return false;
772 // Scaled instructions.
773 case AArch64::STRBui:
774 case AArch64::STRHui:
775 case AArch64::STRSui:
776 case AArch64::STRDui:
777 case AArch64::STRQui:
778 case AArch64::STRXui:
779 case AArch64::STRWui:
780 case AArch64::STRHHui:
781 case AArch64::STRBBui:
782 case AArch64::LDRBui:
783 case AArch64::LDRHui:
784 case AArch64::LDRSui:
785 case AArch64::LDRDui:
786 case AArch64::LDRQui:
787 case AArch64::LDRXui:
788 case AArch64::LDRWui:
789 case AArch64::LDRHHui:
790 case AArch64::LDRBBui:
791 case AArch64::STGi:
792 case AArch64::STZGi:
793 case AArch64::ST2Gi:
794 case AArch64::STZ2Gi:
795 case AArch64::STGPi:
796 // Unscaled instructions.
797 case AArch64::STURSi:
798 case AArch64::STURDi:
799 case AArch64::STURQi:
800 case AArch64::STURWi:
801 case AArch64::STURXi:
802 case AArch64::LDURSi:
803 case AArch64::LDURDi:
804 case AArch64::LDURQi:
805 case AArch64::LDURWi:
806 case AArch64::LDURXi:
807 // Paired instructions.
808 case AArch64::LDPSi:
809 case AArch64::LDPSWi:
810 case AArch64::LDPDi:
811 case AArch64::LDPQi:
812 case AArch64::LDPWi:
813 case AArch64::LDPXi:
814 case AArch64::STPSi:
815 case AArch64::STPDi:
816 case AArch64::STPQi:
817 case AArch64::STPWi:
818 case AArch64::STPXi:
819 // Make sure this is a reg+imm (as opposed to an address reloc).
821 return false;
822
823 // When using stack tagging, simple sp+imm loads and stores are not
824 // tag-checked, but pre- and post-indexed versions of them are, so we can't
825 // replace the former with the latter. This transformation would be valid
826 // if the load/store accesses an untagged stack slot, but we don't have
827 // that information available after frame indices have been eliminated.
828 if (AFI.isMTETagged() &&
829 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
830 return false;
831
832 return true;
833 }
834}
835
836// Make sure this is a reg+reg Ld/St
837static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
838 unsigned Opc = MI.getOpcode();
839 switch (Opc) {
840 default:
841 return false;
842 // Scaled instructions.
843 // TODO: Add more index address stores.
844 case AArch64::LDRBroX:
845 case AArch64::LDRBBroX:
846 case AArch64::LDRSBXroX:
847 case AArch64::LDRSBWroX:
848 Scale = 1;
849 return true;
850 case AArch64::LDRHroX:
851 case AArch64::LDRHHroX:
852 case AArch64::LDRSHXroX:
853 case AArch64::LDRSHWroX:
854 Scale = 2;
855 return true;
856 case AArch64::LDRWroX:
857 case AArch64::LDRSroX:
858 case AArch64::LDRSWroX:
859 Scale = 4;
860 return true;
861 case AArch64::LDRDroX:
862 case AArch64::LDRXroX:
863 Scale = 8;
864 return true;
865 case AArch64::LDRQroX:
866 Scale = 16;
867 return true;
868 }
869}
870
872 switch (MO.getParent()->getOpcode()) {
873 default:
874 return MO.isRenamable();
875 case AArch64::ORRWrs:
876 case AArch64::ADDWri:
877 return true;
878 }
879}
880
882AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
884 const LdStPairFlags &Flags) {
886 "Expected promotable zero stores.");
887
888 MachineBasicBlock::iterator E = I->getParent()->end();
890 // If NextI is the second of the two instructions to be merged, we need
891 // to skip one further. Either way we merge will invalidate the iterator,
892 // and we don't need to scan the new instruction, as it's a pairwise
893 // instruction, which we're not considering for further action anyway.
894 if (NextI == MergeMI)
895 NextI = next_nodbg(NextI, E);
896
897 unsigned Opc = I->getOpcode();
898 unsigned MergeMIOpc = MergeMI->getOpcode();
899 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
900 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
901 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
902 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
903
904 bool MergeForward = Flags.getMergeForward();
905 // Insert our new paired instruction after whichever of the paired
906 // instructions MergeForward indicates.
907 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
908 // Also based on MergeForward is from where we copy the base register operand
909 // so we get the flags compatible with the input code.
910 const MachineOperand &BaseRegOp =
911 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
912 : AArch64InstrInfo::getLdStBaseOp(*I);
913
914 // Which register is Rt and which is Rt2 depends on the offset order.
915 int64_t IOffsetInBytes =
916 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
917 int64_t MIOffsetInBytes =
919 MergeMIOffsetStride;
920 // Select final offset based on the offset order.
921 int64_t OffsetImm;
922 if (IOffsetInBytes > MIOffsetInBytes)
923 OffsetImm = MIOffsetInBytes;
924 else
925 OffsetImm = IOffsetInBytes;
926
927 int NewOpcode = getMatchingWideOpcode(Opc);
928 // Adjust final offset on scaled stores because the new instruction
929 // has a different scale.
930 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
931 int NewOffsetStride = TII->getMemScale(NewOpcode);
932 assert(((OffsetImm % NewOffsetStride) == 0) &&
933 "Offset should be a multiple of the store memory scale");
934 OffsetImm = OffsetImm / NewOffsetStride;
935 }
936
937 // Construct the new instruction.
938 DebugLoc DL = I->getDebugLoc();
939 MachineBasicBlock *MBB = I->getParent();
940 MachineInstrBuilder MIB;
941 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
942 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
943 .add(BaseRegOp)
944 .addImm(OffsetImm)
945 .cloneMergedMemRefs({&*I, &*MergeMI})
946 .setMIFlags(I->mergeFlagsWith(*MergeMI));
947 (void)MIB;
948
949 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
950 LLVM_DEBUG(I->print(dbgs()));
951 LLVM_DEBUG(dbgs() << " ");
952 LLVM_DEBUG(MergeMI->print(dbgs()));
953 LLVM_DEBUG(dbgs() << " with instruction:\n ");
954 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
955 LLVM_DEBUG(dbgs() << "\n");
956
957 // Erase the old instructions.
958 I->eraseFromParent();
959 MergeMI->eraseFromParent();
960 return NextI;
961}
962
963// Apply Fn to all instructions between MI and the beginning of the block, until
964// a def for DefReg is reached. Returns true, iff Fn returns true for all
965// visited instructions. Stop after visiting Limit iterations.
967 const TargetRegisterInfo *TRI, unsigned Limit,
968 std::function<bool(MachineInstr &, bool)> &Fn) {
969 auto MBB = MI.getParent();
970 for (MachineInstr &I :
971 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
972 if (!Limit)
973 return false;
974 --Limit;
975
976 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
977 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
978 TRI->regsOverlap(MOP.getReg(), DefReg);
979 });
980 if (!Fn(I, isDef))
981 return false;
982 if (isDef)
983 break;
984 }
985 return true;
986}
987
989 const TargetRegisterInfo *TRI) {
990
991 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
992 if (MOP.isReg() && MOP.isKill())
993 Units.removeReg(MOP.getReg());
994
995 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
996 if (MOP.isReg() && !MOP.isKill())
997 Units.addReg(MOP.getReg());
998}
999
1000/// This function will add a new entry into the debugValueSubstitutions table
1001/// when two instruction have been merged into a new one represented by \p
1002/// MergedInstr.
1004 unsigned InstrNumToSet,
1005 MachineInstr &OriginalInstr,
1006 MachineInstr &MergedInstr) {
1007
1008 // Figure out the Operand Index of the destination register of the
1009 // OriginalInstr in the new MergedInstr.
1010 auto Reg = OriginalInstr.getOperand(0).getReg();
1011 unsigned OperandNo = 0;
1012 bool RegFound = false;
1013 for (const auto Op : MergedInstr.operands()) {
1014 if (Op.getReg() == Reg) {
1015 RegFound = true;
1016 break;
1017 }
1018 OperandNo++;
1019 }
1020
1021 if (RegFound)
1022 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
1023 {InstrNumToSet, OperandNo});
1024}
1025
1027AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1029 const LdStPairFlags &Flags) {
1030 MachineBasicBlock::iterator E = I->getParent()->end();
1032 // If NextI is the second of the two instructions to be merged, we need
1033 // to skip one further. Either way we merge will invalidate the iterator,
1034 // and we don't need to scan the new instruction, as it's a pairwise
1035 // instruction, which we're not considering for further action anyway.
1036 if (NextI == Paired)
1037 NextI = next_nodbg(NextI, E);
1038
1039 int SExtIdx = Flags.getSExtIdx();
1040 unsigned Opc =
1041 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1042 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1043 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1044
1045 bool MergeForward = Flags.getMergeForward();
1046
1047 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1048 if (RenameReg) {
1049 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1050 DefinedInBB.addReg(*RenameReg);
1051
1052 // Return the sub/super register for RenameReg, matching the size of
1053 // OriginalReg.
1054 auto GetMatchingSubReg =
1055 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1056 for (MCPhysReg SubOrSuper :
1057 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1058 if (C->contains(SubOrSuper))
1059 return SubOrSuper;
1060 }
1061 llvm_unreachable("Should have found matching sub or super register!");
1062 };
1063
1064 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1065 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1066 bool IsDef) {
1067 if (IsDef) {
1068 bool SeenDef = false;
1069 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1070 MachineOperand &MOP = MI.getOperand(OpIdx);
1071 // Rename the first explicit definition and all implicit
1072 // definitions matching RegToRename.
1073 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1074 (!MergeForward || !SeenDef ||
1075 (MOP.isDef() && MOP.isImplicit())) &&
1076 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1077 assert((MOP.isImplicit() ||
1078 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1079 "Need renamable operands");
1080 Register MatchingReg;
1081 if (const TargetRegisterClass *RC =
1082 MI.getRegClassConstraint(OpIdx, TII, TRI))
1083 MatchingReg = GetMatchingSubReg(RC);
1084 else {
1085 if (!isRewritableImplicitDef(MOP))
1086 continue;
1087 MatchingReg = GetMatchingSubReg(
1088 TRI->getMinimalPhysRegClass(MOP.getReg()));
1089 }
1090 MOP.setReg(MatchingReg);
1091 SeenDef = true;
1092 }
1093 }
1094 } else {
1095 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1096 MachineOperand &MOP = MI.getOperand(OpIdx);
1097 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1098 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1099 assert((MOP.isImplicit() ||
1100 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1101 "Need renamable operands");
1102 Register MatchingReg;
1103 if (const TargetRegisterClass *RC =
1104 MI.getRegClassConstraint(OpIdx, TII, TRI))
1105 MatchingReg = GetMatchingSubReg(RC);
1106 else
1107 MatchingReg = GetMatchingSubReg(
1108 TRI->getMinimalPhysRegClass(MOP.getReg()));
1109 assert(MatchingReg != AArch64::NoRegister &&
1110 "Cannot find matching regs for renaming");
1111 MOP.setReg(MatchingReg);
1112 }
1113 }
1114 }
1115 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1116 return true;
1117 };
1118 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1119 TRI, UINT32_MAX, UpdateMIs);
1120
1121#if !defined(NDEBUG)
1122 // For forward merging store:
1123 // Make sure the register used for renaming is not used between the
1124 // paired instructions. That would trash the content before the new
1125 // paired instruction.
1126 MCPhysReg RegToCheck = *RenameReg;
1127 // For backward merging load:
1128 // Make sure the register being renamed is not used between the
1129 // paired instructions. That would trash the content after the new
1130 // paired instruction.
1131 if (!MergeForward)
1132 RegToCheck = RegToRename;
1133 for (auto &MI :
1134 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1135 MergeForward ? std::next(I) : I,
1136 MergeForward ? std::next(Paired) : Paired))
1137 assert(all_of(MI.operands(),
1138 [this, RegToCheck](const MachineOperand &MOP) {
1139 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1140 MOP.isUndef() ||
1141 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1142 }) &&
1143 "Rename register used between paired instruction, trashing the "
1144 "content");
1145#endif
1146 }
1147
1148 // Insert our new paired instruction after whichever of the paired
1149 // instructions MergeForward indicates.
1150 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1151 // Also based on MergeForward is from where we copy the base register operand
1152 // so we get the flags compatible with the input code.
1153 const MachineOperand &BaseRegOp =
1154 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1155 : AArch64InstrInfo::getLdStBaseOp(*I);
1156
1158 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1159 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1160 if (IsUnscaled != PairedIsUnscaled) {
1161 // We're trying to pair instructions that differ in how they are scaled. If
1162 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1163 // the opposite (i.e., make Paired's offset unscaled).
1164 int MemSize = TII->getMemScale(*Paired);
1165 if (PairedIsUnscaled) {
1166 // If the unscaled offset isn't a multiple of the MemSize, we can't
1167 // pair the operations together.
1168 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1169 "Offset should be a multiple of the stride!");
1170 PairedOffset /= MemSize;
1171 } else {
1172 PairedOffset *= MemSize;
1173 }
1174 }
1175
1176 // Which register is Rt and which is Rt2 depends on the offset order.
1177 // However, for pre load/stores the Rt should be the one of the pre
1178 // load/store.
1179 MachineInstr *RtMI, *Rt2MI;
1180 if (Offset == PairedOffset + OffsetStride &&
1182 RtMI = &*Paired;
1183 Rt2MI = &*I;
1184 // Here we swapped the assumption made for SExtIdx.
1185 // I.e., we turn ldp I, Paired into ldp Paired, I.
1186 // Update the index accordingly.
1187 if (SExtIdx != -1)
1188 SExtIdx = (SExtIdx + 1) % 2;
1189 } else {
1190 RtMI = &*I;
1191 Rt2MI = &*Paired;
1192 }
1193 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1194 // Scale the immediate offset, if necessary.
1195 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1196 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1197 "Unscaled offset cannot be scaled.");
1198 OffsetImm /= TII->getMemScale(*RtMI);
1199 }
1200
1201 // Construct the new instruction.
1202 MachineInstrBuilder MIB;
1203 DebugLoc DL = I->getDebugLoc();
1204 MachineBasicBlock *MBB = I->getParent();
1205 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1206 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1207 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1208 // Kill flags may become invalid when moving stores for pairing.
1209 if (RegOp0.isUse()) {
1210 if (!MergeForward) {
1211 // Clear kill flags on store if moving upwards. Example:
1212 // STRWui kill %w0, ...
1213 // USE %w1
1214 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1215 // We are about to move the store of w1, so its kill flag may become
1216 // invalid; not the case for w0.
1217 // Since w1 is used between the stores, the kill flag on w1 is cleared
1218 // after merging.
1219 // STPWi kill %w0, %w1, ...
1220 // USE %w1
1221 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1222 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1223 PairedRegOp.setIsKill(false);
1224 } else {
1225 // Clear kill flags of the first stores register. Example:
1226 // STRWui %w1, ...
1227 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1228 // STRW %w0
1230 for (MachineInstr &MI :
1231 make_range(std::next(I->getIterator()), Paired->getIterator()))
1232 MI.clearRegisterKills(Reg, TRI);
1233 }
1234 }
1235
1236 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1237 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1238
1239 // Adds the pre-index operand for pre-indexed ld/st pairs.
1240 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1241 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1242
1243 MIB.add(RegOp0)
1244 .add(RegOp1)
1245 .add(BaseRegOp)
1246 .addImm(OffsetImm)
1247 .cloneMergedMemRefs({&*I, &*Paired})
1248 .setMIFlags(I->mergeFlagsWith(*Paired));
1249
1250 (void)MIB;
1251
1252 LLVM_DEBUG(
1253 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1254 LLVM_DEBUG(I->print(dbgs()));
1255 LLVM_DEBUG(dbgs() << " ");
1256 LLVM_DEBUG(Paired->print(dbgs()));
1257 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1258 if (SExtIdx != -1) {
1259 // Generate the sign extension for the proper result of the ldp.
1260 // I.e., with X1, that would be:
1261 // %w1 = KILL %w1, implicit-def %x1
1262 // %x1 = SBFMXri killed %x1, 0, 31
1263 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1264 // Right now, DstMO has the extended register, since it comes from an
1265 // extended opcode.
1266 Register DstRegX = DstMO.getReg();
1267 // Get the W variant of that register.
1268 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1269 // Update the result of LDP to use the W instead of the X variant.
1270 DstMO.setReg(DstRegW);
1271 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1272 LLVM_DEBUG(dbgs() << "\n");
1273 // Make the machine verifier happy by providing a definition for
1274 // the X register.
1275 // Insert this definition right after the generated LDP, i.e., before
1276 // InsertionPoint.
1277 MachineInstrBuilder MIBKill =
1278 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1279 .addReg(DstRegW)
1280 .addReg(DstRegX, RegState::Define);
1281 MIBKill->getOperand(2).setImplicit();
1282 // Create the sign extension.
1283 MachineInstrBuilder MIBSXTW =
1284 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1285 .addReg(DstRegX)
1286 .addImm(0)
1287 .addImm(31);
1288 (void)MIBSXTW;
1289
1290 // In the case of a sign-extend, where we have something like:
1291 // debugValueSubstitutions:[]
1292 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1293 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1294 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1295 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1296
1297 // It will be converted to:
1298 // debugValueSubstitutions:[]
1299 // $w0, $w1 = LDPWi $x0, 0
1300 // $w0 = KILL $w0, implicit-def $x0
1301 // $x0 = SBFMXri $x0, 0, 31
1302 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1303 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1304
1305 // We want the final result to look like:
1306 // debugValueSubstitutions:
1307 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1308 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1309 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1310 // $w0 = KILL $w0, implicit-def $x0
1311 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1312 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1313 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1314
1315 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1316 // instruction contains the final value we care about we give it a new
1317 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1318 // about, therefore the LDP instruction is also given a new
1319 // debug-instr-number 4. We have to add these substitutions to the
1320 // debugValueSubstitutions table. However, we also have to ensure that the
1321 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1322 // $w1 is the second operand of the LDP instruction.
1323
1324 if (I->peekDebugInstrNum()) {
1325 // If I is the instruction which got sign extended and has a
1326 // debug-instr-number, give the SBFMXri instruction a new
1327 // debug-instr-number, and update the debugValueSubstitutions table with
1328 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1329 // instruction a new debug-instr-number, and update the
1330 // debugValueSubstitutions table with the new debug-instr-number and
1331 // OpIndex pair.
1332 unsigned NewInstrNum;
1333 if (DstRegX == I->getOperand(0).getReg()) {
1334 NewInstrNum = MIBSXTW->getDebugInstrNum();
1335 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1336 *MIBSXTW);
1337 } else {
1338 NewInstrNum = MIB->getDebugInstrNum();
1339 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1340 }
1341 }
1342 if (Paired->peekDebugInstrNum()) {
1343 // If Paired is the instruction which got sign extended and has a
1344 // debug-instr-number, give the SBFMXri instruction a new
1345 // debug-instr-number, and update the debugValueSubstitutions table with
1346 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1347 // instruction a new debug-instr-number, and update the
1348 // debugValueSubstitutions table with the new debug-instr-number and
1349 // OpIndex pair.
1350 unsigned NewInstrNum;
1351 if (DstRegX == Paired->getOperand(0).getReg()) {
1352 NewInstrNum = MIBSXTW->getDebugInstrNum();
1353 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1354 *MIBSXTW);
1355 } else {
1356 NewInstrNum = MIB->getDebugInstrNum();
1357 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1358 *MIB);
1359 }
1360 }
1361
1362 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1363 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1364 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1365 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1366 // variant of the registers.
1367 MachineOperand &MOp0 = MIB->getOperand(0);
1368 MachineOperand &MOp1 = MIB->getOperand(1);
1369 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1370 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1371 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1372 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1373 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1374 } else {
1375
1376 // In the case that the merge doesn't result in a sign-extend, if we have
1377 // something like:
1378 // debugValueSubstitutions:[]
1379 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1380 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1381 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1382 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1383
1384 // It will be converted to:
1385 // debugValueSubstitutions: []
1386 // $x0, $x1 = LDPXi $x0, 0
1387 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1388 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1389
1390 // We want the final result to look like:
1391 // debugValueSubstitutions:
1392 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1393 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1394 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1395 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1396 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1397
1398 // Here all that needs to be done is, that the LDP instruction needs to be
1399 // updated with a new debug-instr-number, we then need to add entries into
1400 // the debugSubstitutions table to map the old instr-refs to the new ones.
1401
1402 // Assign new DebugInstrNum to the Paired instruction.
1403 if (I->peekDebugInstrNum()) {
1404 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1405 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1406 *MIB);
1407 }
1408 if (Paired->peekDebugInstrNum()) {
1409 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1410 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1411 *MIB);
1412 }
1413
1414 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1415 }
1416 LLVM_DEBUG(dbgs() << "\n");
1417
1418 if (MergeForward)
1419 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1420 if (MOP.isReg() && MOP.isKill())
1421 DefinedInBB.addReg(MOP.getReg());
1422
1423 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1424 // only copies implicit defs and makes sure that each operand is only added
1425 // once in case of duplicates.
1426 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1428 SmallSetVector<Register, 4> Ops;
1429 for (const MachineOperand &MO :
1430 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1431 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1432 Ops.insert(MO.getReg());
1433 for (const MachineOperand &MO :
1434 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1435 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1436 Ops.insert(MO.getReg());
1437 for (auto Op : Ops)
1438 MIB.addDef(Op, RegState::Implicit);
1439 };
1440 CopyImplicitOps(I, Paired);
1441
1442 // Erase the old instructions.
1443 I->eraseFromParent();
1444 Paired->eraseFromParent();
1445
1446 return NextI;
1447}
1448
1450AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1453 next_nodbg(LoadI, LoadI->getParent()->end());
1454
1455 int LoadSize = TII->getMemScale(*LoadI);
1456 int StoreSize = TII->getMemScale(*StoreI);
1457 Register LdRt = getLdStRegOp(*LoadI).getReg();
1458 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1459 Register StRt = getLdStRegOp(*StoreI).getReg();
1460 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1461
1462 assert((IsStoreXReg ||
1463 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1464 "Unexpected RegClass");
1465
1466 MachineInstr *BitExtMI;
1467 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1468 // Remove the load, if the destination register of the loads is the same
1469 // register for stored value.
1470 if (StRt == LdRt && LoadSize == 8) {
1471 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1472 LoadI->getIterator())) {
1473 if (MI.killsRegister(StRt, TRI)) {
1474 MI.clearRegisterKills(StRt, TRI);
1475 break;
1476 }
1477 }
1478 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1479 LLVM_DEBUG(LoadI->print(dbgs()));
1480 LLVM_DEBUG(dbgs() << "\n");
1481 LoadI->eraseFromParent();
1482 return NextI;
1483 }
1484 // Replace the load with a mov if the load and store are in the same size.
1485 BitExtMI =
1486 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1487 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1488 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1489 .add(StMO)
1491 .setMIFlags(LoadI->getFlags());
1492 } else {
1493 // FIXME: Currently we disable this transformation in big-endian targets as
1494 // performance and correctness are verified only in little-endian.
1495 if (!Subtarget->isLittleEndian())
1496 return NextI;
1497 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1498 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1499 "Unsupported ld/st match");
1500 assert(LoadSize <= StoreSize && "Invalid load size");
1501 int UnscaledLdOffset =
1502 IsUnscaled
1504 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1505 int UnscaledStOffset =
1506 IsUnscaled
1508 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1509 int Width = LoadSize * 8;
1510 Register DestReg =
1511 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1512 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1513 : LdRt;
1514
1515 assert((UnscaledLdOffset >= UnscaledStOffset &&
1516 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1517 "Invalid offset");
1518
1519 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1520 int Imms = Immr + Width - 1;
1521 if (UnscaledLdOffset == UnscaledStOffset) {
1522 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1523 | ((Immr) << 6) // immr
1524 | ((Imms) << 0) // imms
1525 ;
1526
1527 BitExtMI =
1528 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1530 DestReg)
1531 .add(StMO)
1532 .addImm(AndMaskEncoded)
1533 .setMIFlags(LoadI->getFlags());
1534 } else if (IsStoreXReg && Imms == 31) {
1535 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1536 // instruction.
1537 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1538 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1539 TII->get(AArch64::UBFMWri),
1540 TRI->getSubReg(DestReg, AArch64::sub_32))
1541 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1542 .addImm(Immr)
1543 .addImm(Imms)
1544 .setMIFlags(LoadI->getFlags());
1545 } else {
1546 BitExtMI =
1547 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1548 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1549 DestReg)
1550 .add(StMO)
1551 .addImm(Immr)
1552 .addImm(Imms)
1553 .setMIFlags(LoadI->getFlags());
1554 }
1555 }
1556
1557 // Clear kill flags between store and load.
1558 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1559 BitExtMI->getIterator()))
1560 if (MI.killsRegister(StRt, TRI)) {
1561 MI.clearRegisterKills(StRt, TRI);
1562 break;
1563 }
1564
1565 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1566 LLVM_DEBUG(StoreI->print(dbgs()));
1567 LLVM_DEBUG(dbgs() << " ");
1568 LLVM_DEBUG(LoadI->print(dbgs()));
1569 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1570 LLVM_DEBUG(StoreI->print(dbgs()));
1571 LLVM_DEBUG(dbgs() << " ");
1572 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1573 LLVM_DEBUG(dbgs() << "\n");
1574
1575 // Erase the old instructions.
1576 LoadI->eraseFromParent();
1577 return NextI;
1578}
1579
1580static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1581 // Convert the byte-offset used by unscaled into an "element" offset used
1582 // by the scaled pair load/store instructions.
1583 if (IsUnscaled) {
1584 // If the byte-offset isn't a multiple of the stride, there's no point
1585 // trying to match it.
1586 if (Offset % OffsetStride)
1587 return false;
1588 Offset /= OffsetStride;
1589 }
1590 return Offset <= 63 && Offset >= -64;
1591}
1592
1593// Do alignment, specialized to power of 2 and for signed ints,
1594// avoiding having to do a C-style cast from uint_64t to int when
1595// using alignTo from include/llvm/Support/MathExtras.h.
1596// FIXME: Move this function to include/MathExtras.h?
1597static int alignTo(int Num, int PowOf2) {
1598 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1599}
1600
1601static bool mayAlias(MachineInstr &MIa,
1603 AliasAnalysis *AA) {
1604 for (MachineInstr *MIb : MemInsns) {
1605 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1606 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1607 return true;
1608 }
1609 }
1610
1611 LLVM_DEBUG(dbgs() << "No aliases found\n");
1612 return false;
1613}
1614
1615bool AArch64LoadStoreOpt::findMatchingStore(
1616 MachineBasicBlock::iterator I, unsigned Limit,
1618 MachineBasicBlock::iterator B = I->getParent()->begin();
1620 MachineInstr &LoadMI = *I;
1622
1623 // If the load is the first instruction in the block, there's obviously
1624 // not any matching store.
1625 if (MBBI == B)
1626 return false;
1627
1628 // Track which register units have been modified and used between the first
1629 // insn and the second insn.
1630 ModifiedRegUnits.clear();
1631 UsedRegUnits.clear();
1632
1633 unsigned Count = 0;
1634 do {
1635 MBBI = prev_nodbg(MBBI, B);
1636 MachineInstr &MI = *MBBI;
1637
1638 // Don't count transient instructions towards the search limit since there
1639 // may be different numbers of them if e.g. debug information is present.
1640 if (!MI.isTransient())
1641 ++Count;
1642
1643 // If the load instruction reads directly from the address to which the
1644 // store instruction writes and the stored value is not modified, we can
1645 // promote the load. Since we do not handle stores with pre-/post-index,
1646 // it's unnecessary to check if BaseReg is modified by the store itself.
1647 // Also we can't handle stores without an immediate offset operand,
1648 // while the operand might be the address for a global variable.
1649 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1652 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1653 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1654 StoreI = MBBI;
1655 return true;
1656 }
1657
1658 if (MI.isCall())
1659 return false;
1660
1661 // Update modified / uses register units.
1662 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1663
1664 // Otherwise, if the base register is modified, we have no match, so
1665 // return early.
1666 if (!ModifiedRegUnits.available(BaseReg))
1667 return false;
1668
1669 // If we encounter a store aliased with the load, return early.
1670 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1671 return false;
1672 } while (MBBI != B && Count < Limit);
1673 return false;
1674}
1675
1676static bool needsWinCFI(const MachineFunction *MF) {
1677 return MF->getTarget().getMCAsmInfo().usesWindowsCFI() &&
1679}
1680
1681// Returns true if FirstMI and MI are candidates for merging or pairing.
1682// Otherwise, returns false.
1684 LdStPairFlags &Flags,
1685 const AArch64InstrInfo *TII) {
1686 // If this is volatile or if pairing is suppressed, not a candidate.
1687 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1688 return false;
1689
1690 // We should have already checked FirstMI for pair suppression and volatility.
1691 assert(!FirstMI.hasOrderedMemoryRef() &&
1692 !TII->isLdStPairSuppressed(FirstMI) &&
1693 "FirstMI shouldn't get here if either of these checks are true.");
1694
1695 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1697 return false;
1698
1699 unsigned OpcA = FirstMI.getOpcode();
1700 unsigned OpcB = MI.getOpcode();
1701
1702 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1703 if (OpcA == OpcB)
1704 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1705
1706 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1707 // allow pairing them with other instructions.
1708 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1709 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1710 return false;
1711
1712 // Two pre ld/st of different opcodes cannot be merged either
1714 return false;
1715
1716 // Try to match a sign-extended load/store with a zero-extended load/store.
1717 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1718 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1719 assert(IsValidLdStrOpc &&
1720 "Given Opc should be a Load or Store with an immediate");
1721 // OpcA will be the first instruction in the pair.
1722 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1723 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1724 return true;
1725 }
1726
1727 // If the second instruction isn't even a mergable/pairable load/store, bail
1728 // out.
1729 if (!PairIsValidLdStrOpc)
1730 return false;
1731
1732 // Narrow stores do not have a matching pair opcodes, so constrain their
1733 // merging to zero stores.
1734 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1735 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1736 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1737 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1738
1739 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1740 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1741 // are candidate pairs that can be merged.
1742 if (isPreLdStPairCandidate(FirstMI, MI))
1743 return true;
1744
1745 // Try to match an unscaled load/store with a scaled load/store.
1746 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1748
1749 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1750}
1751
1752static bool canRenameMOP(const MachineOperand &MOP,
1753 const TargetRegisterInfo *TRI) {
1754 if (MOP.isReg()) {
1755 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1756 // Renaming registers with multiple disjunct sub-registers (e.g. the
1757 // result of a LD3) means that all sub-registers are renamed, potentially
1758 // impacting other instructions we did not check. Bail out.
1759 // Note that this relies on the structure of the AArch64 register file. In
1760 // particular, a subregister cannot be written without overwriting the
1761 // whole register.
1762 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1763 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1764 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1765 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1766 LLVM_DEBUG(
1767 dbgs()
1768 << " Cannot rename operands with multiple disjunct subregisters ("
1769 << MOP << ")\n");
1770 return false;
1771 }
1772
1773 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1774 // them must be known. For example, in ORRWrs the implicit-def
1775 // corresponds to the result register.
1776 if (MOP.isImplicit() && MOP.isDef()) {
1777 if (!isRewritableImplicitDef(MOP))
1778 return false;
1779 return TRI->isSuperOrSubRegisterEq(
1780 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1781 }
1782 }
1783 return MOP.isImplicit() ||
1784 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1785}
1786
1787static bool
1790 const TargetRegisterInfo *TRI) {
1791 if (!FirstMI.mayStore())
1792 return false;
1793
1794 // Check if we can find an unused register which we can use to rename
1795 // the register used by the first load/store.
1796
1797 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1798 // For now, we only rename if the store operand gets killed at the store.
1799 if (!getLdStRegOp(FirstMI).isKill() &&
1800 !any_of(FirstMI.operands(),
1801 [TRI, RegToRename](const MachineOperand &MOP) {
1802 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1803 MOP.isImplicit() && MOP.isKill() &&
1804 TRI->regsOverlap(RegToRename, MOP.getReg());
1805 })) {
1806 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1807 return false;
1808 }
1809
1810 bool FoundDef = false;
1811
1812 // For each instruction between FirstMI and the previous def for RegToRename,
1813 // we
1814 // * check if we can rename RegToRename in this instruction
1815 // * collect the registers used and required register classes for RegToRename.
1816 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1817 bool IsDef) {
1818 LLVM_DEBUG(dbgs() << "Checking " << MI);
1819 // Currently we do not try to rename across frame-setup instructions.
1820 if (MI.getFlag(MachineInstr::FrameSetup)) {
1821 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1822 << "currently\n");
1823 return false;
1824 }
1825
1826 UsedInBetween.accumulate(MI);
1827
1828 // For a definition, check that we can rename the definition and exit the
1829 // loop.
1830 FoundDef = IsDef;
1831
1832 // For defs, check if we can rename the first def of RegToRename.
1833 if (FoundDef) {
1834 // For some pseudo instructions, we might not generate code in the end
1835 // (e.g. KILL) and we would end up without a correct def for the rename
1836 // register.
1837 // TODO: This might be overly conservative and we could handle those cases
1838 // in multiple ways:
1839 // 1. Insert an extra copy, to materialize the def.
1840 // 2. Skip pseudo-defs until we find an non-pseudo def.
1841 if (MI.isPseudo()) {
1842 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1843 return false;
1844 }
1845
1846 for (auto &MOP : MI.operands()) {
1847 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1848 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1849 continue;
1850 if (!canRenameMOP(MOP, TRI)) {
1851 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1852 return false;
1853 }
1854 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1855 }
1856 return true;
1857 } else {
1858 for (auto &MOP : MI.operands()) {
1859 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1860 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1861 continue;
1862
1863 if (!canRenameMOP(MOP, TRI)) {
1864 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1865 return false;
1866 }
1867 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1868 }
1869 }
1870 return true;
1871 };
1872
1873 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1874 return false;
1875
1876 if (!FoundDef) {
1877 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1878 return false;
1879 }
1880 return true;
1881}
1882
1883// We want to merge the second load into the first by rewriting the usages of
1884// the same reg between first (incl.) and second (excl.). We don't need to care
1885// about any insns before FirstLoad or after SecondLoad.
1886// 1. The second load writes new value into the same reg.
1887// - The renaming is impossible to impact later use of the reg.
1888// - The second load always trash the value written by the first load which
1889// means the reg must be killed before the second load.
1890// 2. The first load must be a def for the same reg so we don't need to look
1891// into anything before it.
1893 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1894 LiveRegUnits &UsedInBetween,
1896 const TargetRegisterInfo *TRI) {
1897 if (FirstLoad.isPseudo())
1898 return false;
1899
1900 UsedInBetween.accumulate(FirstLoad);
1901 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1902 bool Success = std::all_of(
1903 FirstLoad.getIterator(), SecondLoad.getIterator(),
1904 [&](MachineInstr &MI) {
1905 LLVM_DEBUG(dbgs() << "Checking " << MI);
1906 // Currently we do not try to rename across frame-setup instructions.
1907 if (MI.getFlag(MachineInstr::FrameSetup)) {
1908 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1909 << "currently\n");
1910 return false;
1911 }
1912
1913 for (auto &MOP : MI.operands()) {
1914 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1915 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1916 continue;
1917 if (!canRenameMOP(MOP, TRI)) {
1918 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1919 return false;
1920 }
1921 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1922 }
1923
1924 return true;
1925 });
1926 return Success;
1927}
1928
1929// Check if we can find a physical register for renaming \p Reg. This register
1930// must:
1931// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1932// defined registers up to the point where the renamed register will be used,
1933// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1934// registers in the range the rename register will be used,
1935// * is available in all used register classes (checked using RequiredClasses).
1936static std::optional<MCPhysReg> tryToFindRegisterToRename(
1937 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1938 LiveRegUnits &UsedInBetween,
1940 const TargetRegisterInfo *TRI) {
1942
1943 // Checks if any sub- or super-register of PR is callee saved.
1944 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1945 return any_of(TRI->sub_and_superregs_inclusive(PR),
1946 [&MF, TRI](MCPhysReg SubOrSuper) {
1947 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1948 });
1949 };
1950
1951 // Check if PR or one of its sub- or super-registers can be used for all
1952 // required register classes.
1953 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1954 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1955 return any_of(
1956 TRI->sub_and_superregs_inclusive(PR),
1957 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1958 });
1959 };
1960
1961 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1962 for (const MCPhysReg &PR : *RegClass) {
1963 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1964 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1965 CanBeUsedForAllClasses(PR)) {
1966 DefinedInBB.addReg(PR);
1967 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1968 << "\n");
1969 return {PR};
1970 }
1971 }
1972 LLVM_DEBUG(dbgs() << "No rename register found from "
1973 << TRI->getRegClassName(RegClass) << "\n");
1974 return std::nullopt;
1975}
1976
1977// For store pairs: returns a register from FirstMI to the beginning of the
1978// block that can be renamed.
1979// For load pairs: returns a register from FirstMI to MI that can be renamed.
1980static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1981 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1982 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1984 const TargetRegisterInfo *TRI) {
1985 std::optional<MCPhysReg> RenameReg;
1986 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1987 return RenameReg;
1988
1989 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1990 MachineFunction &MF = *FirstMI.getParent()->getParent();
1991 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1992 return RenameReg;
1993
1994 const bool IsLoad = FirstMI.mayLoad();
1995
1996 if (!MaybeCanRename) {
1997 if (IsLoad)
1998 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1999 RequiredClasses, TRI)};
2000 else
2001 MaybeCanRename = {
2002 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
2003 }
2004
2005 if (*MaybeCanRename) {
2006 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
2007 RequiredClasses, TRI);
2008 }
2009 return RenameReg;
2010}
2011
2012/// Scan the instructions looking for a load/store that can be combined with the
2013/// current instruction into a wider equivalent or a load/store pair.
2015AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
2016 LdStPairFlags &Flags, unsigned Limit,
2017 bool FindNarrowMerge) {
2018 MachineBasicBlock::iterator E = I->getParent()->end();
2020 MachineInstr &FirstMI = *I;
2021 MBBI = next_nodbg(MBBI, E);
2022
2023 bool MayLoad = FirstMI.mayLoad();
2024 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
2025 Register Reg = getLdStRegOp(FirstMI).getReg();
2028 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
2029 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
2030
2031 std::optional<bool> MaybeCanRename;
2032 if (!EnableRenaming)
2033 MaybeCanRename = {false};
2034
2035 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2036 LiveRegUnits UsedInBetween;
2037 UsedInBetween.init(*TRI);
2038
2039 Flags.clearRenameReg();
2040
2041 // Track which register units have been modified and used between the first
2042 // insn (inclusive) and the second insn.
2043 ModifiedRegUnits.clear();
2044 UsedRegUnits.clear();
2045
2046 // Remember any instructions that read/write memory between FirstMI and MI.
2047 SmallVector<MachineInstr *, 4> MemInsns;
2048
2049 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2050 for (unsigned Count = 0; MBBI != E && Count < Limit;
2051 MBBI = next_nodbg(MBBI, E)) {
2052 MachineInstr &MI = *MBBI;
2053 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2054
2055 UsedInBetween.accumulate(MI);
2056
2057 // Don't count transient instructions towards the search limit since there
2058 // may be different numbers of them if e.g. debug information is present.
2059 if (!MI.isTransient())
2060 ++Count;
2061
2062 Flags.setSExtIdx(-1);
2063 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2065 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2066 // If we've found another instruction with the same opcode, check to see
2067 // if the base and offset are compatible with our starting instruction.
2068 // These instructions all have scaled immediate operands, so we just
2069 // check for +1/-1. Make sure to check the new instruction offset is
2070 // actually an immediate and not a symbolic reference destined for
2071 // a relocation.
2074 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2075 if (IsUnscaled != MIIsUnscaled) {
2076 // We're trying to pair instructions that differ in how they are scaled.
2077 // If FirstMI is scaled then scale the offset of MI accordingly.
2078 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2079 int MemSize = TII->getMemScale(MI);
2080 if (MIIsUnscaled) {
2081 // If the unscaled offset isn't a multiple of the MemSize, we can't
2082 // pair the operations together: bail and keep looking.
2083 if (MIOffset % MemSize) {
2084 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2085 UsedRegUnits, TRI);
2086 MemInsns.push_back(&MI);
2087 continue;
2088 }
2089 MIOffset /= MemSize;
2090 } else {
2091 MIOffset *= MemSize;
2092 }
2093 }
2094
2095 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2096
2097 if (BaseReg == MIBaseReg) {
2098 // If the offset of the second ld/st is not equal to the size of the
2099 // destination register it can’t be paired with a pre-index ld/st
2100 // pair. Additionally if the base reg is used or modified the operations
2101 // can't be paired: bail and keep looking.
2102 if (IsPreLdSt) {
2103 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2104 bool IsBaseRegUsed = !UsedRegUnits.available(
2106 bool IsBaseRegModified = !ModifiedRegUnits.available(
2108 // If the stored value and the address of the second instruction is
2109 // the same, it needs to be using the updated register and therefore
2110 // it must not be folded.
2111 bool IsMIRegTheSame =
2112 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2114 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2115 IsMIRegTheSame) {
2116 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2117 UsedRegUnits, TRI);
2118 MemInsns.push_back(&MI);
2119 continue;
2120 }
2121 } else {
2122 if ((Offset != MIOffset + OffsetStride) &&
2123 (Offset + OffsetStride != MIOffset)) {
2124 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2125 UsedRegUnits, TRI);
2126 MemInsns.push_back(&MI);
2127 continue;
2128 }
2129 }
2130
2131 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2132 if (FindNarrowMerge) {
2133 // If the alignment requirements of the scaled wide load/store
2134 // instruction can't express the offset of the scaled narrow input,
2135 // bail and keep looking. For promotable zero stores, allow only when
2136 // the stored value is the same (i.e., WZR).
2137 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2138 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2139 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2140 UsedRegUnits, TRI);
2141 MemInsns.push_back(&MI);
2142 continue;
2143 }
2144 } else {
2145 // Pairwise instructions have a 7-bit signed offset field. Single
2146 // insns have a 12-bit unsigned offset field. If the resultant
2147 // immediate offset of merging these instructions is out of range for
2148 // a pairwise instruction, bail and keep looking.
2149 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2150 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2151 UsedRegUnits, TRI);
2152 MemInsns.push_back(&MI);
2153 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2154 << "keep looking.\n");
2155 continue;
2156 }
2157 // If the alignment requirements of the paired (scaled) instruction
2158 // can't express the offset of the unscaled input, bail and keep
2159 // looking.
2160 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2161 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2162 UsedRegUnits, TRI);
2163 MemInsns.push_back(&MI);
2165 << "Offset doesn't fit due to alignment requirements, "
2166 << "keep looking.\n");
2167 continue;
2168 }
2169 }
2170
2171 // If the BaseReg has been modified, then we cannot do the optimization.
2172 // For example, in the following pattern
2173 // ldr x1 [x2]
2174 // ldr x2 [x3]
2175 // ldr x4 [x2, #8],
2176 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2177 if (!ModifiedRegUnits.available(BaseReg))
2178 return E;
2179
2180 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2182
2183 // If the Rt of the second instruction (destination register of the
2184 // load) was not modified or used between the two instructions and none
2185 // of the instructions between the second and first alias with the
2186 // second, we can combine the second into the first.
2187 bool RtNotModified =
2188 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2189 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2190 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2191
2192 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2193 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2194 << (RtNotModified ? "true" : "false") << "\n"
2195 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2196 << (RtNotUsed ? "true" : "false") << "\n");
2197
2198 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2199 // For pairs loading into the same reg, try to find a renaming
2200 // opportunity to allow the renaming of Reg between FirstMI and MI
2201 // and combine MI into FirstMI; otherwise bail and keep looking.
2202 if (SameLoadReg) {
2203 std::optional<MCPhysReg> RenameReg =
2204 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2205 Reg, DefinedInBB, UsedInBetween,
2206 RequiredClasses, TRI);
2207 if (!RenameReg) {
2208 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2209 UsedRegUnits, TRI);
2210 MemInsns.push_back(&MI);
2211 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2212 << "keep looking.\n");
2213 continue;
2214 }
2215 Flags.setRenameReg(*RenameReg);
2216 }
2217
2218 Flags.setMergeForward(false);
2219 if (!SameLoadReg)
2220 Flags.clearRenameReg();
2221 return MBBI;
2222 }
2223
2224 // Likewise, if the Rt of the first instruction is not modified or used
2225 // between the two instructions and none of the instructions between the
2226 // first and the second alias with the first, we can combine the first
2227 // into the second.
2228 RtNotModified = !(
2229 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2230
2231 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2232 << "Reg '" << getLdStRegOp(FirstMI)
2233 << "' not modified: "
2234 << (RtNotModified ? "true" : "false") << "\n");
2235
2236 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2237 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2238 Flags.setMergeForward(true);
2239 Flags.clearRenameReg();
2240 return MBBI;
2241 }
2242
2243 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2244 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2245 RequiredClasses, TRI);
2246 if (RenameReg) {
2247 Flags.setMergeForward(true);
2248 Flags.setRenameReg(*RenameReg);
2249 return MBBI;
2250 }
2251 }
2252 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2253 << "interference in between, keep looking.\n");
2254 }
2255 }
2256
2257 // If the instruction wasn't a matching load or store. Stop searching if we
2258 // encounter a call instruction that might modify memory.
2259 if (MI.isCall()) {
2260 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2261 return E;
2262 }
2263
2264 // Update modified / uses register units.
2265 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2266
2267 // Otherwise, if the base register is modified, we have no match, so
2268 // return early.
2269 if (!ModifiedRegUnits.available(BaseReg)) {
2270 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2271 return E;
2272 }
2273
2274 // Update list of instructions that read/write memory.
2275 if (MI.mayLoadOrStore())
2276 MemInsns.push_back(&MI);
2277 }
2278 return E;
2279}
2280
2283 assert((MI.getOpcode() == AArch64::SUBXri ||
2284 MI.getOpcode() == AArch64::ADDXri) &&
2285 "Expected a register update instruction");
2286 auto End = MI.getParent()->end();
2287 if (MaybeCFI == End ||
2288 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2289 !(MI.getFlag(MachineInstr::FrameSetup) ||
2290 MI.getFlag(MachineInstr::FrameDestroy)) ||
2291 MI.getOperand(0).getReg() != AArch64::SP)
2292 return End;
2293
2294 const MachineFunction &MF = *MI.getParent()->getParent();
2295 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2296 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2297 switch (CFI.getOperation()) {
2300 return MaybeCFI;
2301 default:
2302 return End;
2303 }
2304}
2305
2306std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2308 bool IsForward, bool IsPreIdx, bool MergeEither) {
2309 assert((Update->getOpcode() == AArch64::ADDXri ||
2310 Update->getOpcode() == AArch64::SUBXri) &&
2311 "Unexpected base register update instruction to merge!");
2312 MachineBasicBlock::iterator E = I->getParent()->end();
2314
2315 // If updating the SP and the following instruction is CFA offset related CFI,
2316 // make sure the CFI follows the SP update either by merging at the location
2317 // of the update or by moving the CFI after the merged instruction. If unable
2318 // to do so, bail.
2319 MachineBasicBlock::iterator InsertPt = I;
2320 if (IsForward) {
2321 assert(IsPreIdx);
2322 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2323 if (MergeEither) {
2324 InsertPt = Update;
2325 } else {
2326 // Take care not to reorder CFIs.
2327 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2328 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2329 }))
2330 return std::nullopt;
2331
2332 MachineBasicBlock *MBB = InsertPt->getParent();
2333 MBB->splice(std::next(InsertPt), MBB, CFI);
2334 }
2335 }
2336 }
2337
2338 // Return the instruction following the merged instruction, which is
2339 // the instruction following our unmerged load. Unless that's the add/sub
2340 // instruction we're merging, in which case it's the one after that.
2341 if (NextI == Update)
2342 NextI = next_nodbg(NextI, E);
2343
2344 int Value = Update->getOperand(2).getImm();
2345 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2346 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2347 if (Update->getOpcode() == AArch64::SUBXri)
2348 Value = -Value;
2349
2350 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2351 : getPostIndexedOpcode(I->getOpcode());
2352 MachineInstrBuilder MIB;
2353 int Scale, MinOffset, MaxOffset;
2354 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2356 // Non-paired instruction.
2357 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2358 TII->get(NewOpc))
2359 .add(Update->getOperand(0))
2360 .add(getLdStRegOp(*I))
2362 .addImm(Value / Scale)
2363 .setMemRefs(I->memoperands())
2364 .setMIFlags(I->mergeFlagsWith(*Update));
2365 } else {
2366 // Paired instruction.
2367 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2368 TII->get(NewOpc))
2369 .add(Update->getOperand(0))
2370 .add(getLdStRegOp(*I, 0))
2371 .add(getLdStRegOp(*I, 1))
2373 .addImm(Value / Scale)
2374 .setMemRefs(I->memoperands())
2375 .setMIFlags(I->mergeFlagsWith(*Update));
2376 }
2377
2378 if (IsPreIdx) {
2379 ++NumPreFolded;
2380 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2381 } else {
2382 ++NumPostFolded;
2383 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2384 }
2385 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2386 LLVM_DEBUG(I->print(dbgs()));
2387 LLVM_DEBUG(dbgs() << " ");
2388 LLVM_DEBUG(Update->print(dbgs()));
2389 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2390 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2391 LLVM_DEBUG(dbgs() << "\n");
2392
2393 // Erase the old instructions for the block.
2394 I->eraseFromParent();
2395 Update->eraseFromParent();
2396
2397 return NextI;
2398}
2399
2401AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2403 unsigned Offset, int Scale) {
2404 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2405 "Unexpected const mov instruction to merge!");
2406 MachineBasicBlock::iterator E = I->getParent()->end();
2408 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2409 MachineInstr &MemMI = *I;
2410 unsigned Mask = (1 << 12) * Scale - 1;
2411 unsigned Low = Offset & Mask;
2412 unsigned High = Offset - Low;
2415 MachineInstrBuilder AddMIB, MemMIB;
2416
2417 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2418 AddMIB =
2419 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2420 .addDef(IndexReg)
2421 .addUse(BaseReg)
2422 .addImm(High >> 12) // shifted value
2423 .addImm(12); // shift 12
2424 (void)AddMIB;
2425 // Ld/St DestReg, IndexReg, Imm12
2426 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2427 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2428 .add(getLdStRegOp(MemMI))
2430 .addImm(Low / Scale)
2431 .setMemRefs(I->memoperands())
2432 .setMIFlags(I->mergeFlagsWith(*Update));
2433 (void)MemMIB;
2434
2435 ++NumConstOffsetFolded;
2436 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2437 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2438 LLVM_DEBUG(PrevI->print(dbgs()));
2439 LLVM_DEBUG(dbgs() << " ");
2440 LLVM_DEBUG(Update->print(dbgs()));
2441 LLVM_DEBUG(dbgs() << " ");
2442 LLVM_DEBUG(I->print(dbgs()));
2443 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2444 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2445 LLVM_DEBUG(dbgs() << " ");
2446 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2447 LLVM_DEBUG(dbgs() << "\n");
2448
2449 // Erase the old instructions for the block.
2450 I->eraseFromParent();
2451 PrevI->eraseFromParent();
2452 Update->eraseFromParent();
2453
2454 return NextI;
2455}
2456
2457bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2458 MachineInstr &MI,
2459 unsigned BaseReg, int Offset) {
2460 switch (MI.getOpcode()) {
2461 default:
2462 break;
2463 case AArch64::SUBXri:
2464 case AArch64::ADDXri:
2465 // Make sure it's a vanilla immediate operand, not a relocation or
2466 // anything else we can't handle.
2467 if (!MI.getOperand(2).isImm())
2468 break;
2469 // Watch out for 1 << 12 shifted value.
2470 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2471 break;
2472
2473 // The update instruction source and destination register must be the
2474 // same as the load/store base register.
2475 if (MI.getOperand(0).getReg() != BaseReg ||
2476 MI.getOperand(1).getReg() != BaseReg)
2477 break;
2478
2479 int UpdateOffset = MI.getOperand(2).getImm();
2480 if (MI.getOpcode() == AArch64::SUBXri)
2481 UpdateOffset = -UpdateOffset;
2482
2483 // The immediate must be a multiple of the scaling factor of the pre/post
2484 // indexed instruction.
2485 int Scale, MinOffset, MaxOffset;
2486 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2487 if (UpdateOffset % Scale != 0)
2488 break;
2489
2490 // Scaled offset must fit in the instruction immediate.
2491 int ScaledOffset = UpdateOffset / Scale;
2492 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2493 break;
2494
2495 // If we have a non-zero Offset, we check that it matches the amount
2496 // we're adding to the register.
2497 if (!Offset || Offset == UpdateOffset)
2498 return true;
2499 break;
2500 }
2501 return false;
2502}
2503
2504bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2505 MachineInstr &MI,
2506 unsigned IndexReg,
2507 unsigned &Offset) {
2508 // The update instruction source and destination register must be the
2509 // same as the load/store index register.
2510 if (MI.getOpcode() == AArch64::MOVKWi &&
2511 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2512
2513 // movz + movk hold a large offset of a Ld/St instruction.
2514 MachineBasicBlock::iterator B = MI.getParent()->begin();
2516 // Skip the scene when the MI is the first instruction of a block.
2517 if (MBBI == B)
2518 return false;
2519 MBBI = prev_nodbg(MBBI, B);
2520 MachineInstr &MovzMI = *MBBI;
2521 // Make sure the MOVKWi and MOVZWi set the same register.
2522 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2523 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2524 unsigned Low = MovzMI.getOperand(1).getImm();
2525 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2526 Offset = High + Low;
2527 // 12-bit optionally shifted immediates are legal for adds.
2528 return Offset >> 24 == 0;
2529 }
2530 }
2531 return false;
2532}
2533
2534MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2535 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2536 MachineBasicBlock::iterator E = I->getParent()->end();
2537 MachineInstr &MemMI = *I;
2539
2541 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2542 TII->getMemScale(MemMI);
2543
2544 // Scan forward looking for post-index opportunities. Updating instructions
2545 // can't be formed if the memory instruction doesn't have the offset we're
2546 // looking for.
2547 if (MIUnscaledOffset != UnscaledOffset)
2548 return E;
2549
2550 // If the base register overlaps a source/destination register, we can't
2551 // merge the update. This does not apply to tag store instructions which
2552 // ignore the address part of the source register.
2553 // This does not apply to STGPi as well, which does not have unpredictable
2554 // behavior in this case unlike normal stores, and always performs writeback
2555 // after reading the source register value.
2556 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2557 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2558 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2559 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2560 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2561 return E;
2562 }
2563 }
2564
2565 // Track which register units have been modified and used between the first
2566 // insn (inclusive) and the second insn.
2567 ModifiedRegUnits.clear();
2568 UsedRegUnits.clear();
2569 MBBI = next_nodbg(MBBI, E);
2570
2571 // We can't post-increment the stack pointer if any instruction between
2572 // the memory access (I) and the increment (MBBI) can access the memory
2573 // region defined by [SP, MBBI].
2574 const bool BaseRegSP = BaseReg == AArch64::SP;
2575 if (BaseRegSP && needsWinCFI(I->getMF())) {
2576 // FIXME: For now, we always block the optimization over SP in windows
2577 // targets as it requires to adjust the unwind/debug info, messing up
2578 // the unwind info can actually cause a miscompile.
2579 return E;
2580 }
2581
2582 unsigned Count = 0;
2583 MachineBasicBlock *CurMBB = I->getParent();
2584 // choice of next block to visit is liveins-based
2585 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2586
2587 while (true) {
2588 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2589 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2590 MachineInstr &MI = *MBBI;
2591
2592 // Don't count transient instructions towards the search limit since there
2593 // may be different numbers of them if e.g. debug information is present.
2594 if (!MI.isTransient())
2595 ++Count;
2596
2597 // If we found a match, return it.
2598 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2599 return MBBI;
2600
2601 // Update the status of what the instruction clobbered and used.
2602 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2603 TRI);
2604
2605 // Otherwise, if the base register is used or modified, we have no match,
2606 // so return early. If we are optimizing SP, do not allow instructions
2607 // that may load or store in between the load and the optimized value
2608 // update.
2609 if (!ModifiedRegUnits.available(BaseReg) ||
2610 !UsedRegUnits.available(BaseReg) ||
2611 (BaseRegSP && MBBI->mayLoadOrStore()))
2612 return E;
2613 }
2614
2615 if (!VisitSucc || Limit <= Count)
2616 break;
2617
2618 // Try to go downward to successors along a CF path w/o side enters
2619 // such that BaseReg is alive along it but not at its exits
2620 MachineBasicBlock *SuccToVisit = nullptr;
2621 unsigned LiveSuccCount = 0;
2622 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2623 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2624 if (Succ->isLiveIn(*AI)) {
2625 if (LiveSuccCount++)
2626 return E;
2627 if (Succ->pred_size() == 1)
2628 SuccToVisit = Succ;
2629 break;
2630 }
2631 }
2632 }
2633 if (!SuccToVisit)
2634 break;
2635 CurMBB = SuccToVisit;
2636 MBBI = CurMBB->begin();
2637 }
2638
2639 return E;
2640}
2641
2642MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2643 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2644 MachineBasicBlock::iterator B = I->getParent()->begin();
2645 MachineBasicBlock::iterator E = I->getParent()->end();
2646 MachineInstr &MemMI = *I;
2648 MachineFunction &MF = *MemMI.getMF();
2649
2652
2653 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2654 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2655 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2656 : AArch64::NoRegister};
2657
2658 // If the load/store is the first instruction in the block, there's obviously
2659 // not any matching update. Ditto if the memory offset isn't zero.
2660 if (MBBI == B || Offset != 0)
2661 return E;
2662 // If the base register overlaps a destination register, we can't
2663 // merge the update.
2664 if (!isTagStore(MemMI)) {
2665 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2666 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2667 return E;
2668 }
2669
2670 const bool BaseRegSP = BaseReg == AArch64::SP;
2671 if (BaseRegSP && needsWinCFI(I->getMF())) {
2672 // FIXME: For now, we always block the optimization over SP in windows
2673 // targets as it requires to adjust the unwind/debug info, messing up
2674 // the unwind info can actually cause a miscompile.
2675 return E;
2676 }
2677
2678 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2679 unsigned RedZoneSize =
2680 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2681
2682 // Track which register units have been modified and used between the first
2683 // insn (inclusive) and the second insn.
2684 ModifiedRegUnits.clear();
2685 UsedRegUnits.clear();
2686 unsigned Count = 0;
2687 bool MemAccessBeforeSPPreInc = false;
2688 MergeEither = true;
2689 do {
2690 MBBI = prev_nodbg(MBBI, B);
2691 MachineInstr &MI = *MBBI;
2692
2693 // Don't count transient instructions towards the search limit since there
2694 // may be different numbers of them if e.g. debug information is present.
2695 if (!MI.isTransient())
2696 ++Count;
2697
2698 // If we found a match, return it.
2699 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2700 // Check that the update value is within our red zone limit (which may be
2701 // zero).
2702 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2703 return E;
2704 return MBBI;
2705 }
2706
2707 // Update the status of what the instruction clobbered and used.
2708 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2709
2710 // Otherwise, if the base register is used or modified, we have no match, so
2711 // return early.
2712 if (!ModifiedRegUnits.available(BaseReg) ||
2713 !UsedRegUnits.available(BaseReg))
2714 return E;
2715
2716 // If we have a destination register (i.e. a load instruction) and a
2717 // destination register is used or modified, then we can only merge forward,
2718 // i.e. the combined instruction is put in the place of the memory
2719 // instruction. Same applies if we see a memory access or side effects.
2720 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2721 (DestReg[0] != AArch64::NoRegister &&
2722 !(ModifiedRegUnits.available(DestReg[0]) &&
2723 UsedRegUnits.available(DestReg[0]))) ||
2724 (DestReg[1] != AArch64::NoRegister &&
2725 !(ModifiedRegUnits.available(DestReg[1]) &&
2726 UsedRegUnits.available(DestReg[1]))))
2727 MergeEither = false;
2728
2729 // Keep track if we have a memory access before an SP pre-increment, in this
2730 // case we need to validate later that the update amount respects the red
2731 // zone.
2732 if (BaseRegSP && MBBI->mayLoadOrStore())
2733 MemAccessBeforeSPPreInc = true;
2734 } while (MBBI != B && Count < Limit);
2735 return E;
2736}
2737
2739AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2740 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2741 MachineBasicBlock::iterator B = I->getParent()->begin();
2742 MachineBasicBlock::iterator E = I->getParent()->end();
2743 MachineInstr &MemMI = *I;
2745
2746 // If the load is the first instruction in the block, there's obviously
2747 // not any matching load or store.
2748 if (MBBI == B)
2749 return E;
2750
2751 // Make sure the IndexReg is killed and the shift amount is zero.
2752 // TODO: Relex this restriction to extend, simplify processing now.
2753 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2754 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2756 return E;
2757
2759
2760 // Track which register units have been modified and used between the first
2761 // insn (inclusive) and the second insn.
2762 ModifiedRegUnits.clear();
2763 UsedRegUnits.clear();
2764 unsigned Count = 0;
2765 do {
2766 MBBI = prev_nodbg(MBBI, B);
2767 MachineInstr &MI = *MBBI;
2768
2769 // Don't count transient instructions towards the search limit since there
2770 // may be different numbers of them if e.g. debug information is present.
2771 if (!MI.isTransient())
2772 ++Count;
2773
2774 // If we found a match, return it.
2775 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2776 return MBBI;
2777 }
2778
2779 // Update the status of what the instruction clobbered and used.
2780 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2781
2782 // Otherwise, if the index register is used or modified, we have no match,
2783 // so return early.
2784 if (!ModifiedRegUnits.available(IndexReg) ||
2785 !UsedRegUnits.available(IndexReg))
2786 return E;
2787
2788 } while (MBBI != B && Count < Limit);
2789 return E;
2790}
2791
2792bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2794 MachineInstr &MI = *MBBI;
2795 // If this is a volatile load, don't mess with it.
2796 if (MI.hasOrderedMemoryRef())
2797 return false;
2798
2799 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2800 return false;
2801
2802 // Make sure this is a reg+imm.
2803 // FIXME: It is possible to extend it to handle reg+reg cases.
2805 return false;
2806
2807 // Look backward up to LdStLimit instructions.
2809 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2810 ++NumLoadsFromStoresPromoted;
2811 // Promote the load. Keeping the iterator straight is a
2812 // pain, so we let the merge routine tell us what the next instruction
2813 // is after it's done mucking about.
2814 MBBI = promoteLoadFromStore(MBBI, StoreI);
2815 return true;
2816 }
2817 return false;
2818}
2819
2820// Merge adjacent zero stores into a wider store.
2821bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2823 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2824 MachineInstr &MI = *MBBI;
2825 MachineBasicBlock::iterator E = MI.getParent()->end();
2826
2827 if (!TII->isCandidateToMergeOrPair(MI))
2828 return false;
2829
2830 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2831 LdStPairFlags Flags;
2833 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2834 if (MergeMI != E) {
2835 ++NumZeroStoresPromoted;
2836
2837 // Keeping the iterator straight is a pain, so we let the merge routine tell
2838 // us what the next instruction is after it's done mucking about.
2839 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2840 return true;
2841 }
2842 return false;
2843}
2844
2845// Find loads and stores that can be merged into a single load or store pair
2846// instruction.
2847bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2848 MachineInstr &MI = *MBBI;
2849 MachineBasicBlock::iterator E = MI.getParent()->end();
2850
2851 if (!TII->isCandidateToMergeOrPair(MI))
2852 return false;
2853
2854 // If disable-ldp feature is opted, do not emit ldp.
2855 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2856 return false;
2857
2858 // If disable-stp feature is opted, do not emit stp.
2859 if (MI.mayStore() && Subtarget->hasDisableStp())
2860 return false;
2861
2862 // Early exit if the offset is not possible to match. (6 bits of positive
2863 // range, plus allow an extra one in case we find a later insn that matches
2864 // with Offset-1)
2865 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2867 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2868 // Allow one more for offset.
2869 if (Offset > 0)
2870 Offset -= OffsetStride;
2871 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2872 return false;
2873
2874 // Look ahead up to LdStLimit instructions for a pairable instruction.
2875 LdStPairFlags Flags;
2877 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2878
2879 if (Paired == E)
2880 return false;
2881
2882 // Keeping the iterator straight is a pain, so we let the merge routine tell
2883 // us what the next instruction is after it's done mucking about.
2884 auto Prev = std::prev(MBBI);
2885
2886 // Fetch the memoperand of the load/store that is a candidate for combination.
2887 MachineMemOperand *MemOp =
2888 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2889
2890 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2891 // that the alignment of the source pointer is at least double the alignment
2892 // of the type.
2893 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2894 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2895 // If there is no size/align information, cancel the transformation.
2896 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2897 NumFailedAlignmentCheck++;
2898 return false;
2899 }
2900
2901 // Get the needed alignments to check them if
2902 // ldp-aligned-only/stp-aligned-only features are opted.
2903 uint64_t MemAlignment = MemOp->getAlign().value();
2904 uint64_t TypeAlignment =
2905 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2906
2907 if (MemAlignment < 2 * TypeAlignment) {
2908 NumFailedAlignmentCheck++;
2909 return false;
2910 }
2911 }
2912
2913 ++NumPairCreated;
2914 if (TII->hasUnscaledLdStOffset(MI))
2915 ++NumUnscaledPairCreated;
2916
2917 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2918 // Collect liveness info for instructions between Prev and the new position
2919 // MBBI.
2920 for (auto I = std::next(Prev); I != MBBI; I++)
2921 updateDefinedRegisters(*I, DefinedInBB, TRI);
2922
2923 return true;
2924}
2925
2926bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2928 MachineInstr &MI = *MBBI;
2929 MachineBasicBlock::iterator E = MI.getParent()->end();
2931
2932 // Do not form post-inc addressing mode for volatile accesses. Instructions
2933 // performing register writeback do not set a valid instruction syndrome,
2934 // making it impossible to handle MMIO in protected hypervisors.
2935 // Exclude accesses based on the stack pointer, as these can't be MMIO.
2936 // Also exclude MTE tag store instructions.
2937 if (MBBI->hasOrderedMemoryRef() &&
2938 AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP &&
2939 !isTagStore(MI) && MI.getOpcode() != AArch64::STGPi)
2940 return false;
2941
2942 // Look forward to try to form a post-index instruction. For example,
2943 // ldr x0, [x20]
2944 // add x20, x20, #32
2945 // merged into:
2946 // ldr x0, [x20], #32
2947 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2948 if (Update != E) {
2949 // Merge the update into the ld/st.
2950 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2951 /*IsPreIdx=*/false,
2952 /*MergeEither=*/false)) {
2953 MBBI = *NextI;
2954 return true;
2955 }
2956 }
2957
2958 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2959 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2960 return false;
2961
2962 // Look back to try to find a pre-index instruction. For example,
2963 // add x0, x0, #8
2964 // ldr x1, [x0]
2965 // merged into:
2966 // ldr x1, [x0, #8]!
2967 bool MergeEither;
2968 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2969 if (Update != E) {
2970 // Merge the update into the ld/st.
2971 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2972 /*IsPreIdx=*/true, MergeEither)) {
2973 MBBI = *NextI;
2974 return true;
2975 }
2976 }
2977
2978 // The immediate in the load/store is scaled by the size of the memory
2979 // operation. The immediate in the add we're looking for,
2980 // however, is not, so adjust here.
2981 int UnscaledOffset =
2983
2984 // Look forward to try to find a pre-index instruction. For example,
2985 // ldr x1, [x0, #64]
2986 // add x0, x0, #64
2987 // merged into:
2988 // ldr x1, [x0, #64]!
2989 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2990 if (Update != E) {
2991 // Merge the update into the ld/st.
2992 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2993 /*IsPreIdx=*/true,
2994 /*MergeEither=*/false)) {
2995 MBBI = *NextI;
2996 return true;
2997 }
2998 }
2999
3000 return false;
3001}
3002
3003bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
3004 int Scale) {
3005 MachineInstr &MI = *MBBI;
3006 MachineBasicBlock::iterator E = MI.getParent()->end();
3008
3009 // Don't know how to handle unscaled pre/post-index versions below, so bail.
3010 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
3011 return false;
3012
3013 // Look back to try to find a const offset for index LdSt instruction. For
3014 // example,
3015 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3016 // ldr x1, [x0, x8]
3017 // merged into:
3018 // add x8, x0, a * (1<<12)
3019 // ldr x1, [x8, imm12]
3020 unsigned Offset;
3021 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
3022 if (Update != E && (Offset & (Scale - 1)) == 0) {
3023 // Merge the imm12 into the ld/st.
3024 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
3025 return true;
3026 }
3027
3028 return false;
3029}
3030
3031// Map a GPR store opcode to its FPR equivalent at the same data width.
3032// Returns 0 if no mapping exists.
3033static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc) {
3034 switch (GPRStoreOpc) {
3035 // Unsigned immediate.
3036 case AArch64::STRBBui:
3037 return AArch64::STRBui;
3038 case AArch64::STRHHui:
3039 return AArch64::STRHui;
3040 case AArch64::STRWui:
3041 return AArch64::STRSui;
3042 case AArch64::STRXui:
3043 return AArch64::STRDui;
3044 // Unscaled immediate.
3045 case AArch64::STURBBi:
3046 return AArch64::STURBi;
3047 case AArch64::STURHHi:
3048 return AArch64::STURHi;
3049 case AArch64::STURWi:
3050 return AArch64::STURSi;
3051 case AArch64::STURXi:
3052 return AArch64::STURDi;
3053 // Register offset.
3054 case AArch64::STRBBroW:
3055 return AArch64::STRBroW;
3056 case AArch64::STRBBroX:
3057 return AArch64::STRBroX;
3058 case AArch64::STRHHroW:
3059 return AArch64::STRHroW;
3060 case AArch64::STRHHroX:
3061 return AArch64::STRHroX;
3062 case AArch64::STRWroW:
3063 return AArch64::STRSroW;
3064 case AArch64::STRWroX:
3065 return AArch64::STRSroX;
3066 case AArch64::STRXroW:
3067 return AArch64::STRDroW;
3068 case AArch64::STRXroX:
3069 return AArch64::STRDroX;
3070 default:
3071 return 0;
3072 }
3073}
3074
3075// Given a UMOV-lane-0 opcode, return the sub-register index to extract from
3076// the vector register, or 0 if the opcode is not a supported UMOV.
3077static unsigned getUMOVSubRegIdx(unsigned UMOVOpc) {
3078 switch (UMOVOpc) {
3079 case AArch64::UMOVvi8_idx0:
3080 return AArch64::bsub;
3081 case AArch64::UMOVvi16_idx0:
3082 return AArch64::hsub;
3083 case AArch64::UMOVvi32_idx0:
3084 return AArch64::ssub;
3085 case AArch64::UMOVvi64_idx0:
3086 return AArch64::dsub;
3087 default:
3088 return 0;
3089 }
3090}
3091
3092bool AArch64LoadStoreOpt::tryToReplaceUMOVStore(
3094 MachineInstr &StoreMI = *MBBI;
3095
3096 unsigned FPRStoreOpc = getGPRToFPRStoreOpcode(StoreMI.getOpcode());
3097 if (!FPRStoreOpc)
3098 return false;
3099
3100 if (StoreMI.hasOrderedMemoryRef() || StoreMI.memoperands().size() != 1)
3101 return false;
3102
3103 MachineBasicBlock *MBB = StoreMI.getParent();
3104 MCPhysReg StoreValReg = StoreMI.getOperand(0).getReg();
3105
3106 if (!StoreMI.getOperand(0).isKill())
3107 return false;
3108
3109 // Bail out if the store uses the value register elsewhere (e.g., as the base
3110 // address in `str w8, [x8, #0]`).
3111 for (unsigned I = 1, E = StoreMI.getNumExplicitOperands(); I < E; ++I)
3112 if (StoreMI.getOperand(I).isReg() &&
3113 TRI->regsOverlap(StoreMI.getOperand(I).getReg(), StoreValReg))
3114 return false;
3115
3116 // Scan backward to find the UMOV that defines the store's value register.
3117 MachineInstr *UMOVMI = nullptr;
3119 unsigned SubRegIdx = 0;
3120 unsigned Count = 0;
3121 for (auto It = MBBI; It != B;) {
3122 MachineInstr &MI = *--It;
3123 if (MI.isDebugInstr())
3124 continue;
3125 if (++Count > UMOVFoldLimit)
3126 return false;
3127 if (MI.readsRegister(StoreValReg, TRI))
3128 return false;
3129 if (MI.modifiesRegister(StoreValReg, TRI)) {
3130 SubRegIdx = getUMOVSubRegIdx(MI.getOpcode());
3131 if (!SubRegIdx)
3132 return false;
3133 UMOVMI = &MI;
3134 break;
3135 }
3136 }
3137 if (!UMOVMI)
3138 return false;
3139 MCPhysReg VecReg = UMOVMI->getOperand(1).getReg();
3140 MCPhysReg FPRReg = TRI->getSubReg(VecReg, SubRegIdx);
3141 if ((*StoreMI.memoperands_begin())->getSizeInBits() !=
3142 TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(FPRReg)))
3143 return false;
3144
3145 // Check that no instruction between UMOV and store clobbers the vector
3146 // register. Also track whether VecReg is killed anywhere from the UMOV
3147 // (inclusive) through the intervening instructions -- we need this to decide
3148 // whether the FPR sub-register can be marked killed on the new store.
3149 bool VecRegKilled = UMOVMI->killsRegister(VecReg, TRI);
3150 for (auto It = std::next(UMOVMI->getIterator()); It != MBBI; ++It) {
3151 if (It->modifiesRegister(VecReg, TRI))
3152 return false;
3153 if (!VecRegKilled && It->killsRegister(VecReg, TRI))
3154 VecRegKilled = true;
3155 }
3156
3157 // Safe to proceed. Clear kill flags on the vector register between UMOV and
3158 // the new store so the FPR sub-register stays live.
3159 UMOVMI->clearRegisterKills(VecReg, TRI);
3160 for (auto It = std::next(UMOVMI->getIterator()); It != MBBI; ++It)
3161 It->clearRegisterKills(VecReg, TRI);
3162
3163 LLVM_DEBUG(dbgs() << "Folding UMOV + store: " << *UMOVMI << " + "
3164 << StoreMI);
3165
3166 auto MIB = BuildMI(*MBB, MBBI, StoreMI.getDebugLoc(), TII->get(FPRStoreOpc))
3167 .addReg(FPRReg, getKillRegState(VecRegKilled));
3168 for (unsigned I = 1, E = StoreMI.getNumExplicitOperands(); I < E; ++I)
3169 MIB.add(StoreMI.getOperand(I));
3170 MIB.setMemRefs(StoreMI.memoperands());
3171
3172 MBBI = MBB->erase(MBBI);
3173 UMOVMI->eraseFromParent();
3174
3175 ++NumUMOVFoldedToFPRStore;
3176 return true;
3177}
3178
3179bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
3180 bool EnableNarrowZeroStOpt) {
3181 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
3182
3183 bool Modified = false;
3184 // Six transformations to do here:
3185 // 1) Find loads that directly read from stores and promote them by
3186 // replacing with mov instructions. If the store is wider than the load,
3187 // the load will be replaced with a bitfield extract.
3188 // e.g.,
3189 // str w1, [x0, #4]
3190 // ldrh w2, [x0, #6]
3191 // ; becomes
3192 // str w1, [x0, #4]
3193 // lsr w2, w1, #16
3195 MBBI != E;) {
3196 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3197 Modified = true;
3198 else
3199 ++MBBI;
3200 }
3201 // 2) Merge adjacent zero stores into a wider store.
3202 // e.g.,
3203 // strh wzr, [x0]
3204 // strh wzr, [x0, #2]
3205 // ; becomes
3206 // str wzr, [x0]
3207 // e.g.,
3208 // str wzr, [x0]
3209 // str wzr, [x0, #4]
3210 // ; becomes
3211 // str xzr, [x0]
3212 if (EnableNarrowZeroStOpt)
3214 MBBI != E;) {
3215 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3216 Modified = true;
3217 else
3218 ++MBBI;
3219 }
3220 // 3) Find loads and stores that can be merged into a single load or store
3221 // pair instruction.
3222 // When compiling for SVE 128, also try to combine SVE fill/spill
3223 // instructions into LDP/STP.
3224 // e.g.,
3225 // ldr x0, [x2]
3226 // ldr x1, [x2, #8]
3227 // ; becomes
3228 // ldp x0, x1, [x2]
3229 // e.g.,
3230 // ldr z0, [x2]
3231 // ldr z1, [x2, #1, mul vl]
3232 // ; becomes
3233 // ldp q0, q1, [x2]
3234
3236 DefinedInBB.clear();
3237 DefinedInBB.addLiveIns(MBB);
3238 }
3239
3241 MBBI != E;) {
3242 // Track currently live registers up to this point, to help with
3243 // searching for a rename register on demand.
3244 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3245 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3246 Modified = true;
3247 else
3248 ++MBBI;
3249 }
3250 // 4) Find base register updates that can be merged into the load or store
3251 // as a base-reg writeback.
3252 // e.g.,
3253 // ldr x0, [x2]
3254 // add x2, x2, #4
3255 // ; becomes
3256 // ldr x0, [x2], #4
3258 MBBI != E;) {
3259 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3260 Modified = true;
3261 else
3262 ++MBBI;
3263 }
3264
3265 // 5) Find a register assigned with a const value that can be combined with
3266 // into the load or store. e.g.,
3267 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3268 // ldr x1, [x0, x8]
3269 // ; becomes
3270 // add x8, x0, a * (1<<12)
3271 // ldr x1, [x8, imm12]
3273 MBBI != E;) {
3274 int Scale;
3275 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3276 Modified = true;
3277 else
3278 ++MBBI;
3279 }
3280
3281 // 6) Replace UMOV (lane 0) + GPR store with a direct FPR sub-register store.
3282 // e.g.,
3283 // umov w8, v0.h[0]
3284 // strh w8, [x0]
3285 // ; becomes
3286 // str h0, [x0]
3288 MBBI != E;) {
3289 if (tryToReplaceUMOVStore(MBBI))
3290 Modified = true;
3291 else
3292 ++MBBI;
3293 }
3294
3295 return Modified;
3296}
3297
3298bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3299 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3300 TII = Subtarget->getInstrInfo();
3301 TRI = Subtarget->getRegisterInfo();
3302
3303 // Resize the modified and used register unit trackers. We do this once
3304 // per function and then clear the register units each time we optimize a load
3305 // or store.
3306 ModifiedRegUnits.init(*TRI);
3307 UsedRegUnits.init(*TRI);
3308 DefinedInBB.init(*TRI);
3309
3310 bool Modified = false;
3311 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3312 for (auto &MBB : Fn) {
3313 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3314 Modified |= M;
3315 }
3316
3317 return Modified;
3318}
3319
3320// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3321// stores near one another? Note: The pre-RA instruction scheduler already has
3322// hooks to try and schedule pairable loads/stores together to improve pairing
3323// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3324
3325// FIXME: When pairing store instructions it's very possible for this pass to
3326// hoist a store with a KILL marker above another use (without a KILL marker).
3327// The resulting IR is invalid, but nothing uses the KILL markers after this
3328// pass, so it's never caused a problem in practice.
3329
3330bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3331 if (skipFunction(MF.getFunction()))
3332 return false;
3333 AArch64LoadStoreOpt Impl;
3334 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3335 return Impl.runOnMachineFunction(MF);
3336}
3337
3338/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3339/// load / store optimization pass.
3341 return new AArch64LoadStoreOptLegacy();
3342}
3343
3347 AArch64LoadStoreOpt Impl;
3349 .getManager()
3350 .getResult<AAManager>(MF.getFunction());
3351 bool Changed = Impl.runOnMachineFunction(MF);
3352 if (!Changed)
3353 return PreservedAnalyses::all();
3356 return PA;
3357}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static unsigned getUMOVSubRegIdx(unsigned UMOVOpc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > UMOVFoldLimit("aarch64-umov-fold-scan-limit", cl::init(16), cl::Hidden)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:663
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
OpType getOperation() const
Definition MCDwarf.h:804
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:573
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MCRegisterClass TargetRegisterClass
Definition FastISel.h:58