LLVM 23.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCDwarf.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <cstdint>
47#include <functional>
48#include <iterator>
49#include <limits>
50#include <optional>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "aarch64-ldst-opt"
55
56STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded, "Number of post-index updates folded");
58STATISTIC(NumPreFolded, "Number of pre-index updates folded");
59STATISTIC(NumUnscaledPairCreated,
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
64 "not passed the alignment check");
65STATISTIC(NumConstOffsetFolded,
66 "Number of const offset of index address folded");
67
68DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
69 "Controls which pairs are considered for renaming");
70
71// The LdStLimit limits how far we search for load/store pairs.
72static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
73 cl::init(20), cl::Hidden);
74
75// The UpdateLimit limits how far we search for update instructions when we form
76// pre-/post-index instructions.
77static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
79
80// The LdStConstLimit limits how far we search for const offset instructions
81// when we form index address load/store instructions.
82static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
83 cl::init(10), cl::Hidden);
84
85// Enable register renaming to find additional store pairing opportunities.
86static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
87 cl::init(true), cl::Hidden);
88
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
90
91namespace {
92
93using LdStPairFlags = struct LdStPairFlags {
94 // If a matching instruction is found, MergeForward is set to true if the
95 // merge is to remove the first instruction and replace the second with
96 // a pair-wise insn, and false if the reverse is true.
97 bool MergeForward = false;
98
99 // SExtIdx gives the index of the result of the load pair that must be
100 // extended. The value of SExtIdx assumes that the paired load produces the
101 // value in this order: (I, returned iterator), i.e., -1 means no value has
102 // to be extended, 0 means I, and 1 means the returned iterator.
103 int SExtIdx = -1;
104
105 // If not none, RenameReg can be used to rename the result register of the
106 // first store in a pair. Currently this only works when merging stores
107 // forward.
108 std::optional<MCPhysReg> RenameReg;
109
110 LdStPairFlags() = default;
111
112 void setMergeForward(bool V = true) { MergeForward = V; }
113 bool getMergeForward() const { return MergeForward; }
114
115 void setSExtIdx(int V) { SExtIdx = V; }
116 int getSExtIdx() const { return SExtIdx; }
117
118 void setRenameReg(MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
121};
122
123struct AArch64LoadStoreOpt : public MachineFunctionPass {
124 static char ID;
125
126 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
127
129 const AArch64InstrInfo *TII;
130 const TargetRegisterInfo *TRI;
131 const AArch64Subtarget *Subtarget;
132
133 // Track which register units have been modified and used.
134 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
135 LiveRegUnits DefinedInBB;
136
137 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 }
141
142 // Scan the instructions looking for a load/store that can be combined
143 // with the current instruction into a load/store pair.
144 // Return the matching instruction if one is found, else MBB->end().
146 LdStPairFlags &Flags,
147 unsigned Limit,
148 bool FindNarrowMerge);
149
150 // Scan the instructions looking for a store that writes to the address from
151 // which the current load instruction reads. Return true if one is found.
152 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
154
155 // Merge the two instructions indicated into a wider narrow store instruction.
157 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
159 const LdStPairFlags &Flags);
160
161 // Merge the two instructions indicated into a single pair-wise instruction.
163 mergePairedInsns(MachineBasicBlock::iterator I,
165 const LdStPairFlags &Flags);
166
167 // Promote the load that reads directly from the address stored to.
169 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
171
172 // Scan the instruction list to find a base register update that can
173 // be combined with the current instruction (a load or store) using
174 // pre or post indexed addressing with writeback. Scan forwards.
176 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
177 int UnscaledOffset, unsigned Limit);
178
179 // Scan the instruction list to find a register assigned with a const
180 // value that can be combined with the current instruction (a load or store)
181 // using base addressing with writeback. Scan backwards.
183 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
184 unsigned &Offset);
185
186 // Scan the instruction list to find a base register update that can
187 // be combined with the current instruction (a load or store) using
188 // pre or post indexed addressing with writeback. Scan backwards.
189 // `MergeEither` is set to true if the combined instruction may be placed
190 // either at the location of the load/store instruction or at the location of
191 // the update instruction.
193 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
194 bool &MergeEither);
195
196 // Find an instruction that updates the base register of the ld/st
197 // instruction.
198 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
199 unsigned BaseReg, int Offset);
200
201 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
202 unsigned IndexReg, unsigned &Offset);
203
204 // Merge a pre- or post-index base register update into a ld/st instruction.
205 std::optional<MachineBasicBlock::iterator>
206 mergeUpdateInsn(MachineBasicBlock::iterator I,
207 MachineBasicBlock::iterator Update, bool IsForward,
208 bool IsPreIdx, bool MergeEither);
209
211 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
212 MachineBasicBlock::iterator Update, unsigned Offset,
213 int Scale);
214
215 // Find and merge zero store instructions.
216 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
217
218 // Find and pair ldr/str instructions.
219 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
220
221 // Find and promote load instructions which read directly from store.
222 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
223
224 // Find and merge a base register updates before or after a ld/st instruction.
225 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
226
227 // Find and merge an index ldr/st instruction into a base ld/st instruction.
228 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
229
230 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
231
232 bool runOnMachineFunction(MachineFunction &Fn) override;
233
234 MachineFunctionProperties getRequiredProperties() const override {
235 return MachineFunctionProperties().setNoVRegs();
236 }
237
238 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
239};
240
241char AArch64LoadStoreOpt::ID = 0;
242
243} // end anonymous namespace
244
245INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
246 AARCH64_LOAD_STORE_OPT_NAME, false, false)
247
248static bool isNarrowStore(unsigned Opc) {
249 switch (Opc) {
250 default:
251 return false;
252 case AArch64::STRBBui:
253 case AArch64::STURBBi:
254 case AArch64::STRHHui:
255 case AArch64::STURHHi:
256 return true;
257 }
258}
259
260// These instruction set memory tag and either keep memory contents unchanged or
261// set it to zero, ignoring the address part of the source register.
262static bool isTagStore(const MachineInstr &MI) {
263 switch (MI.getOpcode()) {
264 default:
265 return false;
266 case AArch64::STGi:
267 case AArch64::STZGi:
268 case AArch64::ST2Gi:
269 case AArch64::STZ2Gi:
270 return true;
271 }
272}
273
274static unsigned getMatchingNonSExtOpcode(unsigned Opc,
275 bool *IsValidLdStrOpc = nullptr) {
276 if (IsValidLdStrOpc)
277 *IsValidLdStrOpc = true;
278 switch (Opc) {
279 default:
280 if (IsValidLdStrOpc)
281 *IsValidLdStrOpc = false;
282 return std::numeric_limits<unsigned>::max();
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 case AArch64::STRDpre:
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 case AArch64::STRQpre:
289 case AArch64::STRBBui:
290 case AArch64::STURBBi:
291 case AArch64::STRHHui:
292 case AArch64::STURHHi:
293 case AArch64::STRWui:
294 case AArch64::STRWpre:
295 case AArch64::STURWi:
296 case AArch64::STRXui:
297 case AArch64::STRXpre:
298 case AArch64::STURXi:
299 case AArch64::STR_ZXI:
300 case AArch64::LDRDui:
301 case AArch64::LDURDi:
302 case AArch64::LDRDpre:
303 case AArch64::LDRQui:
304 case AArch64::LDURQi:
305 case AArch64::LDRQpre:
306 case AArch64::LDRWui:
307 case AArch64::LDURWi:
308 case AArch64::LDRWpre:
309 case AArch64::LDRXui:
310 case AArch64::LDURXi:
311 case AArch64::LDRXpre:
312 case AArch64::STRSui:
313 case AArch64::STURSi:
314 case AArch64::STRSpre:
315 case AArch64::LDRSui:
316 case AArch64::LDURSi:
317 case AArch64::LDRSpre:
318 case AArch64::LDR_ZXI:
319 return Opc;
320 case AArch64::LDRSWui:
321 return AArch64::LDRWui;
322 case AArch64::LDURSWi:
323 return AArch64::LDURWi;
324 case AArch64::LDRSWpre:
325 return AArch64::LDRWpre;
326 }
327}
328
329static unsigned getMatchingWideOpcode(unsigned Opc) {
330 switch (Opc) {
331 default:
332 llvm_unreachable("Opcode has no wide equivalent!");
333 case AArch64::STRBBui:
334 return AArch64::STRHHui;
335 case AArch64::STRHHui:
336 return AArch64::STRWui;
337 case AArch64::STURBBi:
338 return AArch64::STURHHi;
339 case AArch64::STURHHi:
340 return AArch64::STURWi;
341 case AArch64::STURWi:
342 return AArch64::STURXi;
343 case AArch64::STRWui:
344 return AArch64::STRXui;
345 }
346}
347
348static unsigned getMatchingPairOpcode(unsigned Opc) {
349 switch (Opc) {
350 default:
351 llvm_unreachable("Opcode has no pairwise equivalent!");
352 case AArch64::STRSui:
353 case AArch64::STURSi:
354 return AArch64::STPSi;
355 case AArch64::STRSpre:
356 return AArch64::STPSpre;
357 case AArch64::STRDui:
358 case AArch64::STURDi:
359 return AArch64::STPDi;
360 case AArch64::STRDpre:
361 return AArch64::STPDpre;
362 case AArch64::STRQui:
363 case AArch64::STURQi:
364 case AArch64::STR_ZXI:
365 return AArch64::STPQi;
366 case AArch64::STRQpre:
367 return AArch64::STPQpre;
368 case AArch64::STRWui:
369 case AArch64::STURWi:
370 return AArch64::STPWi;
371 case AArch64::STRWpre:
372 return AArch64::STPWpre;
373 case AArch64::STRXui:
374 case AArch64::STURXi:
375 return AArch64::STPXi;
376 case AArch64::STRXpre:
377 return AArch64::STPXpre;
378 case AArch64::LDRSui:
379 case AArch64::LDURSi:
380 return AArch64::LDPSi;
381 case AArch64::LDRSpre:
382 return AArch64::LDPSpre;
383 case AArch64::LDRDui:
384 case AArch64::LDURDi:
385 return AArch64::LDPDi;
386 case AArch64::LDRDpre:
387 return AArch64::LDPDpre;
388 case AArch64::LDRQui:
389 case AArch64::LDURQi:
390 case AArch64::LDR_ZXI:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
409 }
410}
411
414 unsigned LdOpc = LoadInst.getOpcode();
415 unsigned StOpc = StoreInst.getOpcode();
416 switch (LdOpc) {
417 default:
418 llvm_unreachable("Unsupported load instruction!");
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
439 }
440}
441
442static unsigned getPreIndexedOpcode(unsigned Opc) {
443 // FIXME: We don't currently support creating pre-indexed loads/stores when
444 // the load or store is the unscaled version. If we decide to perform such an
445 // optimization in the future the cases for the unscaled loads/stores will
446 // need to be added here.
447 switch (Opc) {
448 default:
449 llvm_unreachable("Opcode has no pre-indexed equivalent!");
450 case AArch64::STRBui:
451 return AArch64::STRBpre;
452 case AArch64::STRHui:
453 return AArch64::STRHpre;
454 case AArch64::STRSui:
455 return AArch64::STRSpre;
456 case AArch64::STRDui:
457 return AArch64::STRDpre;
458 case AArch64::STRQui:
459 return AArch64::STRQpre;
460 case AArch64::STRBBui:
461 return AArch64::STRBBpre;
462 case AArch64::STRHHui:
463 return AArch64::STRHHpre;
464 case AArch64::STRWui:
465 return AArch64::STRWpre;
466 case AArch64::STRXui:
467 return AArch64::STRXpre;
468 case AArch64::LDRBui:
469 return AArch64::LDRBpre;
470 case AArch64::LDRHui:
471 return AArch64::LDRHpre;
472 case AArch64::LDRSui:
473 return AArch64::LDRSpre;
474 case AArch64::LDRDui:
475 return AArch64::LDRDpre;
476 case AArch64::LDRQui:
477 return AArch64::LDRQpre;
478 case AArch64::LDRBBui:
479 return AArch64::LDRBBpre;
480 case AArch64::LDRHHui:
481 return AArch64::LDRHHpre;
482 case AArch64::LDRWui:
483 return AArch64::LDRWpre;
484 case AArch64::LDRXui:
485 return AArch64::LDRXpre;
486 case AArch64::LDRSWui:
487 return AArch64::LDRSWpre;
488 case AArch64::LDPSi:
489 return AArch64::LDPSpre;
490 case AArch64::LDPSWi:
491 return AArch64::LDPSWpre;
492 case AArch64::LDPDi:
493 return AArch64::LDPDpre;
494 case AArch64::LDPQi:
495 return AArch64::LDPQpre;
496 case AArch64::LDPWi:
497 return AArch64::LDPWpre;
498 case AArch64::LDPXi:
499 return AArch64::LDPXpre;
500 case AArch64::STPSi:
501 return AArch64::STPSpre;
502 case AArch64::STPDi:
503 return AArch64::STPDpre;
504 case AArch64::STPQi:
505 return AArch64::STPQpre;
506 case AArch64::STPWi:
507 return AArch64::STPWpre;
508 case AArch64::STPXi:
509 return AArch64::STPXpre;
510 case AArch64::STGi:
511 return AArch64::STGPreIndex;
512 case AArch64::STZGi:
513 return AArch64::STZGPreIndex;
514 case AArch64::ST2Gi:
515 return AArch64::ST2GPreIndex;
516 case AArch64::STZ2Gi:
517 return AArch64::STZ2GPreIndex;
518 case AArch64::STGPi:
519 return AArch64::STGPpre;
520 }
521}
522
523static unsigned getBaseAddressOpcode(unsigned Opc) {
524 // TODO: Add more index address stores.
525 switch (Opc) {
526 default:
527 llvm_unreachable("Opcode has no base address equivalent!");
528 case AArch64::LDRBroX:
529 return AArch64::LDRBui;
530 case AArch64::LDRBBroX:
531 return AArch64::LDRBBui;
532 case AArch64::LDRSBXroX:
533 return AArch64::LDRSBXui;
534 case AArch64::LDRSBWroX:
535 return AArch64::LDRSBWui;
536 case AArch64::LDRHroX:
537 return AArch64::LDRHui;
538 case AArch64::LDRHHroX:
539 return AArch64::LDRHHui;
540 case AArch64::LDRSHXroX:
541 return AArch64::LDRSHXui;
542 case AArch64::LDRSHWroX:
543 return AArch64::LDRSHWui;
544 case AArch64::LDRWroX:
545 return AArch64::LDRWui;
546 case AArch64::LDRSroX:
547 return AArch64::LDRSui;
548 case AArch64::LDRSWroX:
549 return AArch64::LDRSWui;
550 case AArch64::LDRDroX:
551 return AArch64::LDRDui;
552 case AArch64::LDRXroX:
553 return AArch64::LDRXui;
554 case AArch64::LDRQroX:
555 return AArch64::LDRQui;
556 }
557}
558
559static unsigned getPostIndexedOpcode(unsigned Opc) {
560 switch (Opc) {
561 default:
562 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
563 case AArch64::STRBui:
564 return AArch64::STRBpost;
565 case AArch64::STRHui:
566 return AArch64::STRHpost;
567 case AArch64::STRSui:
568 case AArch64::STURSi:
569 return AArch64::STRSpost;
570 case AArch64::STRDui:
571 case AArch64::STURDi:
572 return AArch64::STRDpost;
573 case AArch64::STRQui:
574 case AArch64::STURQi:
575 return AArch64::STRQpost;
576 case AArch64::STRBBui:
577 return AArch64::STRBBpost;
578 case AArch64::STRHHui:
579 return AArch64::STRHHpost;
580 case AArch64::STRWui:
581 case AArch64::STURWi:
582 return AArch64::STRWpost;
583 case AArch64::STRXui:
584 case AArch64::STURXi:
585 return AArch64::STRXpost;
586 case AArch64::LDRBui:
587 return AArch64::LDRBpost;
588 case AArch64::LDRHui:
589 return AArch64::LDRHpost;
590 case AArch64::LDRSui:
591 case AArch64::LDURSi:
592 return AArch64::LDRSpost;
593 case AArch64::LDRDui:
594 case AArch64::LDURDi:
595 return AArch64::LDRDpost;
596 case AArch64::LDRQui:
597 case AArch64::LDURQi:
598 return AArch64::LDRQpost;
599 case AArch64::LDRBBui:
600 return AArch64::LDRBBpost;
601 case AArch64::LDRHHui:
602 return AArch64::LDRHHpost;
603 case AArch64::LDRWui:
604 case AArch64::LDURWi:
605 return AArch64::LDRWpost;
606 case AArch64::LDRXui:
607 case AArch64::LDURXi:
608 return AArch64::LDRXpost;
609 case AArch64::LDRSWui:
610 return AArch64::LDRSWpost;
611 case AArch64::LDPSi:
612 return AArch64::LDPSpost;
613 case AArch64::LDPSWi:
614 return AArch64::LDPSWpost;
615 case AArch64::LDPDi:
616 return AArch64::LDPDpost;
617 case AArch64::LDPQi:
618 return AArch64::LDPQpost;
619 case AArch64::LDPWi:
620 return AArch64::LDPWpost;
621 case AArch64::LDPXi:
622 return AArch64::LDPXpost;
623 case AArch64::STPSi:
624 return AArch64::STPSpost;
625 case AArch64::STPDi:
626 return AArch64::STPDpost;
627 case AArch64::STPQi:
628 return AArch64::STPQpost;
629 case AArch64::STPWi:
630 return AArch64::STPWpost;
631 case AArch64::STPXi:
632 return AArch64::STPXpost;
633 case AArch64::STGi:
634 return AArch64::STGPostIndex;
635 case AArch64::STZGi:
636 return AArch64::STZGPostIndex;
637 case AArch64::ST2Gi:
638 return AArch64::ST2GPostIndex;
639 case AArch64::STZ2Gi:
640 return AArch64::STZ2GPostIndex;
641 case AArch64::STGPi:
642 return AArch64::STGPpost;
643 }
644}
645
647
648 unsigned OpcA = FirstMI.getOpcode();
649 unsigned OpcB = MI.getOpcode();
650
651 switch (OpcA) {
652 default:
653 return false;
654 case AArch64::STRSpre:
655 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
656 case AArch64::STRDpre:
657 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
658 case AArch64::STRQpre:
659 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
660 case AArch64::STRWpre:
661 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
662 case AArch64::STRXpre:
663 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
664 case AArch64::LDRSpre:
665 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
666 case AArch64::LDRDpre:
667 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
668 case AArch64::LDRQpre:
669 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
670 case AArch64::LDRWpre:
671 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
672 case AArch64::LDRXpre:
673 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
674 case AArch64::LDRSWpre:
675 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
676 }
677}
678
679// Returns the scale and offset range of pre/post indexed variants of MI.
680static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
681 int &MinOffset, int &MaxOffset) {
682 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
683 bool IsTagStore = isTagStore(MI);
684 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
685 // as in the "unsigned offset" variant.
686 // All other pre/post indexed ldst instructions are unscaled.
687 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
688
689 if (IsPaired) {
690 MinOffset = -64;
691 MaxOffset = 63;
692 } else {
693 MinOffset = -256;
694 MaxOffset = 255;
695 }
696}
697
699 unsigned PairedRegOp = 0) {
700 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
701 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
702 if (IsPreLdSt)
703 PairedRegOp += 1;
704 unsigned Idx =
705 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
706 return MI.getOperand(Idx);
707}
708
711 const AArch64InstrInfo *TII) {
712 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
713 int LoadSize = TII->getMemScale(LoadInst);
714 int StoreSize = TII->getMemScale(StoreInst);
715 int UnscaledStOffset =
716 TII->hasUnscaledLdStOffset(StoreInst)
719 int UnscaledLdOffset =
720 TII->hasUnscaledLdStOffset(LoadInst)
723 return (UnscaledStOffset <= UnscaledLdOffset) &&
724 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
725}
726
728 unsigned Opc = MI.getOpcode();
729 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
730 isNarrowStore(Opc)) &&
731 getLdStRegOp(MI).getReg() == AArch64::WZR;
732}
733
735 switch (MI.getOpcode()) {
736 default:
737 return false;
738 // Scaled instructions.
739 case AArch64::LDRBBui:
740 case AArch64::LDRHHui:
741 case AArch64::LDRWui:
742 case AArch64::LDRXui:
743 // Unscaled instructions.
744 case AArch64::LDURBBi:
745 case AArch64::LDURHHi:
746 case AArch64::LDURWi:
747 case AArch64::LDURXi:
748 return true;
749 }
750}
751
753 unsigned Opc = MI.getOpcode();
754 switch (Opc) {
755 default:
756 return false;
757 // Scaled instructions.
758 case AArch64::STRBui:
759 case AArch64::STRHui:
760 case AArch64::STRSui:
761 case AArch64::STRDui:
762 case AArch64::STRQui:
763 case AArch64::STRXui:
764 case AArch64::STRWui:
765 case AArch64::STRHHui:
766 case AArch64::STRBBui:
767 case AArch64::LDRBui:
768 case AArch64::LDRHui:
769 case AArch64::LDRSui:
770 case AArch64::LDRDui:
771 case AArch64::LDRQui:
772 case AArch64::LDRXui:
773 case AArch64::LDRWui:
774 case AArch64::LDRHHui:
775 case AArch64::LDRBBui:
776 case AArch64::STGi:
777 case AArch64::STZGi:
778 case AArch64::ST2Gi:
779 case AArch64::STZ2Gi:
780 case AArch64::STGPi:
781 // Unscaled instructions.
782 case AArch64::STURSi:
783 case AArch64::STURDi:
784 case AArch64::STURQi:
785 case AArch64::STURWi:
786 case AArch64::STURXi:
787 case AArch64::LDURSi:
788 case AArch64::LDURDi:
789 case AArch64::LDURQi:
790 case AArch64::LDURWi:
791 case AArch64::LDURXi:
792 // Paired instructions.
793 case AArch64::LDPSi:
794 case AArch64::LDPSWi:
795 case AArch64::LDPDi:
796 case AArch64::LDPQi:
797 case AArch64::LDPWi:
798 case AArch64::LDPXi:
799 case AArch64::STPSi:
800 case AArch64::STPDi:
801 case AArch64::STPQi:
802 case AArch64::STPWi:
803 case AArch64::STPXi:
804 // Make sure this is a reg+imm (as opposed to an address reloc).
806 return false;
807
808 // When using stack tagging, simple sp+imm loads and stores are not
809 // tag-checked, but pre- and post-indexed versions of them are, so we can't
810 // replace the former with the latter. This transformation would be valid
811 // if the load/store accesses an untagged stack slot, but we don't have
812 // that information available after frame indices have been eliminated.
813 if (AFI.isMTETagged() &&
814 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
815 return false;
816
817 return true;
818 }
819}
820
821// Make sure this is a reg+reg Ld/St
822static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
823 unsigned Opc = MI.getOpcode();
824 switch (Opc) {
825 default:
826 return false;
827 // Scaled instructions.
828 // TODO: Add more index address stores.
829 case AArch64::LDRBroX:
830 case AArch64::LDRBBroX:
831 case AArch64::LDRSBXroX:
832 case AArch64::LDRSBWroX:
833 Scale = 1;
834 return true;
835 case AArch64::LDRHroX:
836 case AArch64::LDRHHroX:
837 case AArch64::LDRSHXroX:
838 case AArch64::LDRSHWroX:
839 Scale = 2;
840 return true;
841 case AArch64::LDRWroX:
842 case AArch64::LDRSroX:
843 case AArch64::LDRSWroX:
844 Scale = 4;
845 return true;
846 case AArch64::LDRDroX:
847 case AArch64::LDRXroX:
848 Scale = 8;
849 return true;
850 case AArch64::LDRQroX:
851 Scale = 16;
852 return true;
853 }
854}
855
857 switch (MO.getParent()->getOpcode()) {
858 default:
859 return MO.isRenamable();
860 case AArch64::ORRWrs:
861 case AArch64::ADDWri:
862 return true;
863 }
864}
865
867AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
869 const LdStPairFlags &Flags) {
871 "Expected promotable zero stores.");
872
873 MachineBasicBlock::iterator E = I->getParent()->end();
875 // If NextI is the second of the two instructions to be merged, we need
876 // to skip one further. Either way we merge will invalidate the iterator,
877 // and we don't need to scan the new instruction, as it's a pairwise
878 // instruction, which we're not considering for further action anyway.
879 if (NextI == MergeMI)
880 NextI = next_nodbg(NextI, E);
881
882 unsigned Opc = I->getOpcode();
883 unsigned MergeMIOpc = MergeMI->getOpcode();
884 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
885 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
886 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
887 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
888
889 bool MergeForward = Flags.getMergeForward();
890 // Insert our new paired instruction after whichever of the paired
891 // instructions MergeForward indicates.
892 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
893 // Also based on MergeForward is from where we copy the base register operand
894 // so we get the flags compatible with the input code.
895 const MachineOperand &BaseRegOp =
896 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
897 : AArch64InstrInfo::getLdStBaseOp(*I);
898
899 // Which register is Rt and which is Rt2 depends on the offset order.
900 int64_t IOffsetInBytes =
901 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
902 int64_t MIOffsetInBytes =
904 MergeMIOffsetStride;
905 // Select final offset based on the offset order.
906 int64_t OffsetImm;
907 if (IOffsetInBytes > MIOffsetInBytes)
908 OffsetImm = MIOffsetInBytes;
909 else
910 OffsetImm = IOffsetInBytes;
911
912 int NewOpcode = getMatchingWideOpcode(Opc);
913 // Adjust final offset on scaled stores because the new instruction
914 // has a different scale.
915 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
916 int NewOffsetStride = TII->getMemScale(NewOpcode);
917 assert(((OffsetImm % NewOffsetStride) == 0) &&
918 "Offset should be a multiple of the store memory scale");
919 OffsetImm = OffsetImm / NewOffsetStride;
920 }
921
922 // Construct the new instruction.
923 DebugLoc DL = I->getDebugLoc();
924 MachineBasicBlock *MBB = I->getParent();
925 MachineInstrBuilder MIB;
926 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
927 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
928 .add(BaseRegOp)
929 .addImm(OffsetImm)
930 .cloneMergedMemRefs({&*I, &*MergeMI})
931 .setMIFlags(I->mergeFlagsWith(*MergeMI));
932 (void)MIB;
933
934 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
935 LLVM_DEBUG(I->print(dbgs()));
936 LLVM_DEBUG(dbgs() << " ");
937 LLVM_DEBUG(MergeMI->print(dbgs()));
938 LLVM_DEBUG(dbgs() << " with instruction:\n ");
939 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
940 LLVM_DEBUG(dbgs() << "\n");
941
942 // Erase the old instructions.
943 I->eraseFromParent();
944 MergeMI->eraseFromParent();
945 return NextI;
946}
947
948// Apply Fn to all instructions between MI and the beginning of the block, until
949// a def for DefReg is reached. Returns true, iff Fn returns true for all
950// visited instructions. Stop after visiting Limit iterations.
952 const TargetRegisterInfo *TRI, unsigned Limit,
953 std::function<bool(MachineInstr &, bool)> &Fn) {
954 auto MBB = MI.getParent();
955 for (MachineInstr &I :
956 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
957 if (!Limit)
958 return false;
959 --Limit;
960
961 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
962 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
963 TRI->regsOverlap(MOP.getReg(), DefReg);
964 });
965 if (!Fn(I, isDef))
966 return false;
967 if (isDef)
968 break;
969 }
970 return true;
971}
972
974 const TargetRegisterInfo *TRI) {
975
976 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
977 if (MOP.isReg() && MOP.isKill())
978 Units.removeReg(MOP.getReg());
979
980 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
981 if (MOP.isReg() && !MOP.isKill())
982 Units.addReg(MOP.getReg());
983}
984
985/// This function will add a new entry into the debugValueSubstitutions table
986/// when two instruction have been merged into a new one represented by \p
987/// MergedInstr.
989 unsigned InstrNumToSet,
990 MachineInstr &OriginalInstr,
991 MachineInstr &MergedInstr) {
992
993 // Figure out the Operand Index of the destination register of the
994 // OriginalInstr in the new MergedInstr.
995 auto Reg = OriginalInstr.getOperand(0).getReg();
996 unsigned OperandNo = 0;
997 bool RegFound = false;
998 for (const auto Op : MergedInstr.operands()) {
999 if (Op.getReg() == Reg) {
1000 RegFound = true;
1001 break;
1002 }
1003 OperandNo++;
1004 }
1005
1006 if (RegFound)
1007 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
1008 {InstrNumToSet, OperandNo});
1009}
1010
1012AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1014 const LdStPairFlags &Flags) {
1015 MachineBasicBlock::iterator E = I->getParent()->end();
1017 // If NextI is the second of the two instructions to be merged, we need
1018 // to skip one further. Either way we merge will invalidate the iterator,
1019 // and we don't need to scan the new instruction, as it's a pairwise
1020 // instruction, which we're not considering for further action anyway.
1021 if (NextI == Paired)
1022 NextI = next_nodbg(NextI, E);
1023
1024 int SExtIdx = Flags.getSExtIdx();
1025 unsigned Opc =
1026 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1027 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1028 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1029
1030 bool MergeForward = Flags.getMergeForward();
1031
1032 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1033 if (RenameReg) {
1034 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1035 DefinedInBB.addReg(*RenameReg);
1036
1037 // Return the sub/super register for RenameReg, matching the size of
1038 // OriginalReg.
1039 auto GetMatchingSubReg =
1040 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1041 for (MCPhysReg SubOrSuper :
1042 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1043 if (C->contains(SubOrSuper))
1044 return SubOrSuper;
1045 }
1046 llvm_unreachable("Should have found matching sub or super register!");
1047 };
1048
1049 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1050 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1051 bool IsDef) {
1052 if (IsDef) {
1053 bool SeenDef = false;
1054 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1055 MachineOperand &MOP = MI.getOperand(OpIdx);
1056 // Rename the first explicit definition and all implicit
1057 // definitions matching RegToRename.
1058 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1059 (!MergeForward || !SeenDef ||
1060 (MOP.isDef() && MOP.isImplicit())) &&
1061 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1062 assert((MOP.isImplicit() ||
1063 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1064 "Need renamable operands");
1065 Register MatchingReg;
1066 if (const TargetRegisterClass *RC =
1067 MI.getRegClassConstraint(OpIdx, TII, TRI))
1068 MatchingReg = GetMatchingSubReg(RC);
1069 else {
1070 if (!isRewritableImplicitDef(MOP))
1071 continue;
1072 MatchingReg = GetMatchingSubReg(
1073 TRI->getMinimalPhysRegClass(MOP.getReg()));
1074 }
1075 MOP.setReg(MatchingReg);
1076 SeenDef = true;
1077 }
1078 }
1079 } else {
1080 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1081 MachineOperand &MOP = MI.getOperand(OpIdx);
1082 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1083 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1084 assert((MOP.isImplicit() ||
1085 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1086 "Need renamable operands");
1087 Register MatchingReg;
1088 if (const TargetRegisterClass *RC =
1089 MI.getRegClassConstraint(OpIdx, TII, TRI))
1090 MatchingReg = GetMatchingSubReg(RC);
1091 else
1092 MatchingReg = GetMatchingSubReg(
1093 TRI->getMinimalPhysRegClass(MOP.getReg()));
1094 assert(MatchingReg != AArch64::NoRegister &&
1095 "Cannot find matching regs for renaming");
1096 MOP.setReg(MatchingReg);
1097 }
1098 }
1099 }
1100 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1101 return true;
1102 };
1103 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1104 TRI, UINT32_MAX, UpdateMIs);
1105
1106#if !defined(NDEBUG)
1107 // For forward merging store:
1108 // Make sure the register used for renaming is not used between the
1109 // paired instructions. That would trash the content before the new
1110 // paired instruction.
1111 MCPhysReg RegToCheck = *RenameReg;
1112 // For backward merging load:
1113 // Make sure the register being renamed is not used between the
1114 // paired instructions. That would trash the content after the new
1115 // paired instruction.
1116 if (!MergeForward)
1117 RegToCheck = RegToRename;
1118 for (auto &MI :
1119 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1120 MergeForward ? std::next(I) : I,
1121 MergeForward ? std::next(Paired) : Paired))
1122 assert(all_of(MI.operands(),
1123 [this, RegToCheck](const MachineOperand &MOP) {
1124 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1125 MOP.isUndef() ||
1126 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1127 }) &&
1128 "Rename register used between paired instruction, trashing the "
1129 "content");
1130#endif
1131 }
1132
1133 // Insert our new paired instruction after whichever of the paired
1134 // instructions MergeForward indicates.
1135 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1136 // Also based on MergeForward is from where we copy the base register operand
1137 // so we get the flags compatible with the input code.
1138 const MachineOperand &BaseRegOp =
1139 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1140 : AArch64InstrInfo::getLdStBaseOp(*I);
1141
1143 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1144 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1145 if (IsUnscaled != PairedIsUnscaled) {
1146 // We're trying to pair instructions that differ in how they are scaled. If
1147 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1148 // the opposite (i.e., make Paired's offset unscaled).
1149 int MemSize = TII->getMemScale(*Paired);
1150 if (PairedIsUnscaled) {
1151 // If the unscaled offset isn't a multiple of the MemSize, we can't
1152 // pair the operations together.
1153 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1154 "Offset should be a multiple of the stride!");
1155 PairedOffset /= MemSize;
1156 } else {
1157 PairedOffset *= MemSize;
1158 }
1159 }
1160
1161 // Which register is Rt and which is Rt2 depends on the offset order.
1162 // However, for pre load/stores the Rt should be the one of the pre
1163 // load/store.
1164 MachineInstr *RtMI, *Rt2MI;
1165 if (Offset == PairedOffset + OffsetStride &&
1167 RtMI = &*Paired;
1168 Rt2MI = &*I;
1169 // Here we swapped the assumption made for SExtIdx.
1170 // I.e., we turn ldp I, Paired into ldp Paired, I.
1171 // Update the index accordingly.
1172 if (SExtIdx != -1)
1173 SExtIdx = (SExtIdx + 1) % 2;
1174 } else {
1175 RtMI = &*I;
1176 Rt2MI = &*Paired;
1177 }
1178 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1179 // Scale the immediate offset, if necessary.
1180 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1181 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1182 "Unscaled offset cannot be scaled.");
1183 OffsetImm /= TII->getMemScale(*RtMI);
1184 }
1185
1186 // Construct the new instruction.
1187 MachineInstrBuilder MIB;
1188 DebugLoc DL = I->getDebugLoc();
1189 MachineBasicBlock *MBB = I->getParent();
1190 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1191 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1192 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1193 // Kill flags may become invalid when moving stores for pairing.
1194 if (RegOp0.isUse()) {
1195 if (!MergeForward) {
1196 // Clear kill flags on store if moving upwards. Example:
1197 // STRWui kill %w0, ...
1198 // USE %w1
1199 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1200 // We are about to move the store of w1, so its kill flag may become
1201 // invalid; not the case for w0.
1202 // Since w1 is used between the stores, the kill flag on w1 is cleared
1203 // after merging.
1204 // STPWi kill %w0, %w1, ...
1205 // USE %w1
1206 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1207 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1208 PairedRegOp.setIsKill(false);
1209 } else {
1210 // Clear kill flags of the first stores register. Example:
1211 // STRWui %w1, ...
1212 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1213 // STRW %w0
1215 for (MachineInstr &MI :
1216 make_range(std::next(I->getIterator()), Paired->getIterator()))
1217 MI.clearRegisterKills(Reg, TRI);
1218 }
1219 }
1220
1221 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1222 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1223
1224 // Adds the pre-index operand for pre-indexed ld/st pairs.
1225 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1226 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1227
1228 MIB.add(RegOp0)
1229 .add(RegOp1)
1230 .add(BaseRegOp)
1231 .addImm(OffsetImm)
1232 .cloneMergedMemRefs({&*I, &*Paired})
1233 .setMIFlags(I->mergeFlagsWith(*Paired));
1234
1235 (void)MIB;
1236
1237 LLVM_DEBUG(
1238 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1239 LLVM_DEBUG(I->print(dbgs()));
1240 LLVM_DEBUG(dbgs() << " ");
1241 LLVM_DEBUG(Paired->print(dbgs()));
1242 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1243 if (SExtIdx != -1) {
1244 // Generate the sign extension for the proper result of the ldp.
1245 // I.e., with X1, that would be:
1246 // %w1 = KILL %w1, implicit-def %x1
1247 // %x1 = SBFMXri killed %x1, 0, 31
1248 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1249 // Right now, DstMO has the extended register, since it comes from an
1250 // extended opcode.
1251 Register DstRegX = DstMO.getReg();
1252 // Get the W variant of that register.
1253 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1254 // Update the result of LDP to use the W instead of the X variant.
1255 DstMO.setReg(DstRegW);
1256 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1257 LLVM_DEBUG(dbgs() << "\n");
1258 // Make the machine verifier happy by providing a definition for
1259 // the X register.
1260 // Insert this definition right after the generated LDP, i.e., before
1261 // InsertionPoint.
1262 MachineInstrBuilder MIBKill =
1263 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1264 .addReg(DstRegW)
1265 .addReg(DstRegX, RegState::Define);
1266 MIBKill->getOperand(2).setImplicit();
1267 // Create the sign extension.
1268 MachineInstrBuilder MIBSXTW =
1269 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1270 .addReg(DstRegX)
1271 .addImm(0)
1272 .addImm(31);
1273 (void)MIBSXTW;
1274
1275 // In the case of a sign-extend, where we have something like:
1276 // debugValueSubstitutions:[]
1277 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1278 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1279 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1280 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1281
1282 // It will be converted to:
1283 // debugValueSubstitutions:[]
1284 // $w0, $w1 = LDPWi $x0, 0
1285 // $w0 = KILL $w0, implicit-def $x0
1286 // $x0 = SBFMXri $x0, 0, 31
1287 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1288 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1289
1290 // We want the final result to look like:
1291 // debugValueSubstitutions:
1292 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1293 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1294 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1295 // $w0 = KILL $w0, implicit-def $x0
1296 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1297 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1298 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1299
1300 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1301 // instruction contains the final value we care about we give it a new
1302 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1303 // about, therefore the LDP instruction is also given a new
1304 // debug-instr-number 4. We have to add these substitutions to the
1305 // debugValueSubstitutions table. However, we also have to ensure that the
1306 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1307 // $w1 is the second operand of the LDP instruction.
1308
1309 if (I->peekDebugInstrNum()) {
1310 // If I is the instruction which got sign extended and has a
1311 // debug-instr-number, give the SBFMXri instruction a new
1312 // debug-instr-number, and update the debugValueSubstitutions table with
1313 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1314 // instruction a new debug-instr-number, and update the
1315 // debugValueSubstitutions table with the new debug-instr-number and
1316 // OpIndex pair.
1317 unsigned NewInstrNum;
1318 if (DstRegX == I->getOperand(0).getReg()) {
1319 NewInstrNum = MIBSXTW->getDebugInstrNum();
1320 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1321 *MIBSXTW);
1322 } else {
1323 NewInstrNum = MIB->getDebugInstrNum();
1324 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1325 }
1326 }
1327 if (Paired->peekDebugInstrNum()) {
1328 // If Paired is the instruction which got sign extended and has a
1329 // debug-instr-number, give the SBFMXri instruction a new
1330 // debug-instr-number, and update the debugValueSubstitutions table with
1331 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1332 // instruction a new debug-instr-number, and update the
1333 // debugValueSubstitutions table with the new debug-instr-number and
1334 // OpIndex pair.
1335 unsigned NewInstrNum;
1336 if (DstRegX == Paired->getOperand(0).getReg()) {
1337 NewInstrNum = MIBSXTW->getDebugInstrNum();
1338 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1339 *MIBSXTW);
1340 } else {
1341 NewInstrNum = MIB->getDebugInstrNum();
1342 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1343 *MIB);
1344 }
1345 }
1346
1347 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1348 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1349 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1350 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1351 // variant of the registers.
1352 MachineOperand &MOp0 = MIB->getOperand(0);
1353 MachineOperand &MOp1 = MIB->getOperand(1);
1354 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1355 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1356 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1357 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1358 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1359 } else {
1360
1361 // In the case that the merge doesn't result in a sign-extend, if we have
1362 // something like:
1363 // debugValueSubstitutions:[]
1364 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1365 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1366 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1367 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1368
1369 // It will be converted to:
1370 // debugValueSubstitutions: []
1371 // $x0, $x1 = LDPXi $x0, 0
1372 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1373 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1374
1375 // We want the final result to look like:
1376 // debugValueSubstitutions:
1377 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1378 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1379 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1380 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1381 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1382
1383 // Here all that needs to be done is, that the LDP instruction needs to be
1384 // updated with a new debug-instr-number, we then need to add entries into
1385 // the debugSubstitutions table to map the old instr-refs to the new ones.
1386
1387 // Assign new DebugInstrNum to the Paired instruction.
1388 if (I->peekDebugInstrNum()) {
1389 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1390 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1391 *MIB);
1392 }
1393 if (Paired->peekDebugInstrNum()) {
1394 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1395 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1396 *MIB);
1397 }
1398
1399 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1400 }
1401 LLVM_DEBUG(dbgs() << "\n");
1402
1403 if (MergeForward)
1404 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1405 if (MOP.isReg() && MOP.isKill())
1406 DefinedInBB.addReg(MOP.getReg());
1407
1408 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1409 // only copies implicit defs and makes sure that each operand is only added
1410 // once in case of duplicates.
1411 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1413 SmallSetVector<Register, 4> Ops;
1414 for (const MachineOperand &MO :
1415 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1416 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1417 Ops.insert(MO.getReg());
1418 for (const MachineOperand &MO :
1419 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1420 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1421 Ops.insert(MO.getReg());
1422 for (auto Op : Ops)
1423 MIB.addDef(Op, RegState::Implicit);
1424 };
1425 CopyImplicitOps(I, Paired);
1426
1427 // Erase the old instructions.
1428 I->eraseFromParent();
1429 Paired->eraseFromParent();
1430
1431 return NextI;
1432}
1433
1435AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1438 next_nodbg(LoadI, LoadI->getParent()->end());
1439
1440 int LoadSize = TII->getMemScale(*LoadI);
1441 int StoreSize = TII->getMemScale(*StoreI);
1442 Register LdRt = getLdStRegOp(*LoadI).getReg();
1443 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1444 Register StRt = getLdStRegOp(*StoreI).getReg();
1445 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1446
1447 assert((IsStoreXReg ||
1448 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1449 "Unexpected RegClass");
1450
1451 MachineInstr *BitExtMI;
1452 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1453 // Remove the load, if the destination register of the loads is the same
1454 // register for stored value.
1455 if (StRt == LdRt && LoadSize == 8) {
1456 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1457 LoadI->getIterator())) {
1458 if (MI.killsRegister(StRt, TRI)) {
1459 MI.clearRegisterKills(StRt, TRI);
1460 break;
1461 }
1462 }
1463 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1464 LLVM_DEBUG(LoadI->print(dbgs()));
1465 LLVM_DEBUG(dbgs() << "\n");
1466 LoadI->eraseFromParent();
1467 return NextI;
1468 }
1469 // Replace the load with a mov if the load and store are in the same size.
1470 BitExtMI =
1471 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1472 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1473 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1474 .add(StMO)
1476 .setMIFlags(LoadI->getFlags());
1477 } else {
1478 // FIXME: Currently we disable this transformation in big-endian targets as
1479 // performance and correctness are verified only in little-endian.
1480 if (!Subtarget->isLittleEndian())
1481 return NextI;
1482 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1483 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1484 "Unsupported ld/st match");
1485 assert(LoadSize <= StoreSize && "Invalid load size");
1486 int UnscaledLdOffset =
1487 IsUnscaled
1489 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1490 int UnscaledStOffset =
1491 IsUnscaled
1493 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1494 int Width = LoadSize * 8;
1495 Register DestReg =
1496 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1497 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1498 : LdRt;
1499
1500 assert((UnscaledLdOffset >= UnscaledStOffset &&
1501 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1502 "Invalid offset");
1503
1504 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1505 int Imms = Immr + Width - 1;
1506 if (UnscaledLdOffset == UnscaledStOffset) {
1507 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1508 | ((Immr) << 6) // immr
1509 | ((Imms) << 0) // imms
1510 ;
1511
1512 BitExtMI =
1513 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1514 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1515 DestReg)
1516 .add(StMO)
1517 .addImm(AndMaskEncoded)
1518 .setMIFlags(LoadI->getFlags());
1519 } else if (IsStoreXReg && Imms == 31) {
1520 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1521 // instruction.
1522 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1523 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1524 TII->get(AArch64::UBFMWri),
1525 TRI->getSubReg(DestReg, AArch64::sub_32))
1526 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1527 .addImm(Immr)
1528 .addImm(Imms)
1529 .setMIFlags(LoadI->getFlags());
1530 } else {
1531 BitExtMI =
1532 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1533 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1534 DestReg)
1535 .add(StMO)
1536 .addImm(Immr)
1537 .addImm(Imms)
1538 .setMIFlags(LoadI->getFlags());
1539 }
1540 }
1541
1542 // Clear kill flags between store and load.
1543 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1544 BitExtMI->getIterator()))
1545 if (MI.killsRegister(StRt, TRI)) {
1546 MI.clearRegisterKills(StRt, TRI);
1547 break;
1548 }
1549
1550 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1551 LLVM_DEBUG(StoreI->print(dbgs()));
1552 LLVM_DEBUG(dbgs() << " ");
1553 LLVM_DEBUG(LoadI->print(dbgs()));
1554 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1555 LLVM_DEBUG(StoreI->print(dbgs()));
1556 LLVM_DEBUG(dbgs() << " ");
1557 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1558 LLVM_DEBUG(dbgs() << "\n");
1559
1560 // Erase the old instructions.
1561 LoadI->eraseFromParent();
1562 return NextI;
1563}
1564
1565static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1566 // Convert the byte-offset used by unscaled into an "element" offset used
1567 // by the scaled pair load/store instructions.
1568 if (IsUnscaled) {
1569 // If the byte-offset isn't a multiple of the stride, there's no point
1570 // trying to match it.
1571 if (Offset % OffsetStride)
1572 return false;
1573 Offset /= OffsetStride;
1574 }
1575 return Offset <= 63 && Offset >= -64;
1576}
1577
1578// Do alignment, specialized to power of 2 and for signed ints,
1579// avoiding having to do a C-style cast from uint_64t to int when
1580// using alignTo from include/llvm/Support/MathExtras.h.
1581// FIXME: Move this function to include/MathExtras.h?
1582static int alignTo(int Num, int PowOf2) {
1583 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1584}
1585
1586static bool mayAlias(MachineInstr &MIa,
1588 AliasAnalysis *AA) {
1589 for (MachineInstr *MIb : MemInsns) {
1590 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1591 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1592 return true;
1593 }
1594 }
1595
1596 LLVM_DEBUG(dbgs() << "No aliases found\n");
1597 return false;
1598}
1599
1600bool AArch64LoadStoreOpt::findMatchingStore(
1601 MachineBasicBlock::iterator I, unsigned Limit,
1603 MachineBasicBlock::iterator B = I->getParent()->begin();
1605 MachineInstr &LoadMI = *I;
1607
1608 // If the load is the first instruction in the block, there's obviously
1609 // not any matching store.
1610 if (MBBI == B)
1611 return false;
1612
1613 // Track which register units have been modified and used between the first
1614 // insn and the second insn.
1615 ModifiedRegUnits.clear();
1616 UsedRegUnits.clear();
1617
1618 unsigned Count = 0;
1619 do {
1620 MBBI = prev_nodbg(MBBI, B);
1621 MachineInstr &MI = *MBBI;
1622
1623 // Don't count transient instructions towards the search limit since there
1624 // may be different numbers of them if e.g. debug information is present.
1625 if (!MI.isTransient())
1626 ++Count;
1627
1628 // If the load instruction reads directly from the address to which the
1629 // store instruction writes and the stored value is not modified, we can
1630 // promote the load. Since we do not handle stores with pre-/post-index,
1631 // it's unnecessary to check if BaseReg is modified by the store itself.
1632 // Also we can't handle stores without an immediate offset operand,
1633 // while the operand might be the address for a global variable.
1634 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1637 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1638 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1639 StoreI = MBBI;
1640 return true;
1641 }
1642
1643 if (MI.isCall())
1644 return false;
1645
1646 // Update modified / uses register units.
1647 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1648
1649 // Otherwise, if the base register is modified, we have no match, so
1650 // return early.
1651 if (!ModifiedRegUnits.available(BaseReg))
1652 return false;
1653
1654 // If we encounter a store aliased with the load, return early.
1655 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1656 return false;
1657 } while (MBBI != B && Count < Limit);
1658 return false;
1659}
1660
1661static bool needsWinCFI(const MachineFunction *MF) {
1662 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1664}
1665
1666// Returns true if FirstMI and MI are candidates for merging or pairing.
1667// Otherwise, returns false.
1669 LdStPairFlags &Flags,
1670 const AArch64InstrInfo *TII) {
1671 // If this is volatile or if pairing is suppressed, not a candidate.
1672 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1673 return false;
1674
1675 // We should have already checked FirstMI for pair suppression and volatility.
1676 assert(!FirstMI.hasOrderedMemoryRef() &&
1677 !TII->isLdStPairSuppressed(FirstMI) &&
1678 "FirstMI shouldn't get here if either of these checks are true.");
1679
1680 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1682 return false;
1683
1684 unsigned OpcA = FirstMI.getOpcode();
1685 unsigned OpcB = MI.getOpcode();
1686
1687 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1688 if (OpcA == OpcB)
1689 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1690
1691 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1692 // allow pairing them with other instructions.
1693 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1694 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1695 return false;
1696
1697 // Two pre ld/st of different opcodes cannot be merged either
1699 return false;
1700
1701 // Try to match a sign-extended load/store with a zero-extended load/store.
1702 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1703 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1704 assert(IsValidLdStrOpc &&
1705 "Given Opc should be a Load or Store with an immediate");
1706 // OpcA will be the first instruction in the pair.
1707 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1708 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1709 return true;
1710 }
1711
1712 // If the second instruction isn't even a mergable/pairable load/store, bail
1713 // out.
1714 if (!PairIsValidLdStrOpc)
1715 return false;
1716
1717 // Narrow stores do not have a matching pair opcodes, so constrain their
1718 // merging to zero stores.
1719 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1720 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1721 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1722 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1723
1724 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1725 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1726 // are candidate pairs that can be merged.
1727 if (isPreLdStPairCandidate(FirstMI, MI))
1728 return true;
1729
1730 // Try to match an unscaled load/store with a scaled load/store.
1731 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1733
1734 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1735}
1736
1737static bool canRenameMOP(const MachineOperand &MOP,
1738 const TargetRegisterInfo *TRI) {
1739 if (MOP.isReg()) {
1740 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1741 // Renaming registers with multiple disjunct sub-registers (e.g. the
1742 // result of a LD3) means that all sub-registers are renamed, potentially
1743 // impacting other instructions we did not check. Bail out.
1744 // Note that this relies on the structure of the AArch64 register file. In
1745 // particular, a subregister cannot be written without overwriting the
1746 // whole register.
1747 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1748 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1749 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1750 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1751 LLVM_DEBUG(
1752 dbgs()
1753 << " Cannot rename operands with multiple disjunct subregisters ("
1754 << MOP << ")\n");
1755 return false;
1756 }
1757
1758 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1759 // them must be known. For example, in ORRWrs the implicit-def
1760 // corresponds to the result register.
1761 if (MOP.isImplicit() && MOP.isDef()) {
1762 if (!isRewritableImplicitDef(MOP))
1763 return false;
1764 return TRI->isSuperOrSubRegisterEq(
1765 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1766 }
1767 }
1768 return MOP.isImplicit() ||
1769 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1770}
1771
1772static bool
1775 const TargetRegisterInfo *TRI) {
1776 if (!FirstMI.mayStore())
1777 return false;
1778
1779 // Check if we can find an unused register which we can use to rename
1780 // the register used by the first load/store.
1781
1782 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1783 // For now, we only rename if the store operand gets killed at the store.
1784 if (!getLdStRegOp(FirstMI).isKill() &&
1785 !any_of(FirstMI.operands(),
1786 [TRI, RegToRename](const MachineOperand &MOP) {
1787 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1788 MOP.isImplicit() && MOP.isKill() &&
1789 TRI->regsOverlap(RegToRename, MOP.getReg());
1790 })) {
1791 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1792 return false;
1793 }
1794
1795 bool FoundDef = false;
1796
1797 // For each instruction between FirstMI and the previous def for RegToRename,
1798 // we
1799 // * check if we can rename RegToRename in this instruction
1800 // * collect the registers used and required register classes for RegToRename.
1801 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1802 bool IsDef) {
1803 LLVM_DEBUG(dbgs() << "Checking " << MI);
1804 // Currently we do not try to rename across frame-setup instructions.
1805 if (MI.getFlag(MachineInstr::FrameSetup)) {
1806 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1807 << "currently\n");
1808 return false;
1809 }
1810
1811 UsedInBetween.accumulate(MI);
1812
1813 // For a definition, check that we can rename the definition and exit the
1814 // loop.
1815 FoundDef = IsDef;
1816
1817 // For defs, check if we can rename the first def of RegToRename.
1818 if (FoundDef) {
1819 // For some pseudo instructions, we might not generate code in the end
1820 // (e.g. KILL) and we would end up without a correct def for the rename
1821 // register.
1822 // TODO: This might be overly conservative and we could handle those cases
1823 // in multiple ways:
1824 // 1. Insert an extra copy, to materialize the def.
1825 // 2. Skip pseudo-defs until we find an non-pseudo def.
1826 if (MI.isPseudo()) {
1827 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1828 return false;
1829 }
1830
1831 for (auto &MOP : MI.operands()) {
1832 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1833 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1834 continue;
1835 if (!canRenameMOP(MOP, TRI)) {
1836 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1837 return false;
1838 }
1839 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1840 }
1841 return true;
1842 } else {
1843 for (auto &MOP : MI.operands()) {
1844 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1845 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1846 continue;
1847
1848 if (!canRenameMOP(MOP, TRI)) {
1849 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1850 return false;
1851 }
1852 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1853 }
1854 }
1855 return true;
1856 };
1857
1858 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1859 return false;
1860
1861 if (!FoundDef) {
1862 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1863 return false;
1864 }
1865 return true;
1866}
1867
1868// We want to merge the second load into the first by rewriting the usages of
1869// the same reg between first (incl.) and second (excl.). We don't need to care
1870// about any insns before FirstLoad or after SecondLoad.
1871// 1. The second load writes new value into the same reg.
1872// - The renaming is impossible to impact later use of the reg.
1873// - The second load always trash the value written by the first load which
1874// means the reg must be killed before the second load.
1875// 2. The first load must be a def for the same reg so we don't need to look
1876// into anything before it.
1878 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1879 LiveRegUnits &UsedInBetween,
1881 const TargetRegisterInfo *TRI) {
1882 if (FirstLoad.isPseudo())
1883 return false;
1884
1885 UsedInBetween.accumulate(FirstLoad);
1886 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1887 bool Success = std::all_of(
1888 FirstLoad.getIterator(), SecondLoad.getIterator(),
1889 [&](MachineInstr &MI) {
1890 LLVM_DEBUG(dbgs() << "Checking " << MI);
1891 // Currently we do not try to rename across frame-setup instructions.
1892 if (MI.getFlag(MachineInstr::FrameSetup)) {
1893 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1894 << "currently\n");
1895 return false;
1896 }
1897
1898 for (auto &MOP : MI.operands()) {
1899 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1900 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1901 continue;
1902 if (!canRenameMOP(MOP, TRI)) {
1903 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1904 return false;
1905 }
1906 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1907 }
1908
1909 return true;
1910 });
1911 return Success;
1912}
1913
1914// Check if we can find a physical register for renaming \p Reg. This register
1915// must:
1916// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1917// defined registers up to the point where the renamed register will be used,
1918// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1919// registers in the range the rename register will be used,
1920// * is available in all used register classes (checked using RequiredClasses).
1921static std::optional<MCPhysReg> tryToFindRegisterToRename(
1922 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1923 LiveRegUnits &UsedInBetween,
1925 const TargetRegisterInfo *TRI) {
1927
1928 // Checks if any sub- or super-register of PR is callee saved.
1929 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1930 return any_of(TRI->sub_and_superregs_inclusive(PR),
1931 [&MF, TRI](MCPhysReg SubOrSuper) {
1932 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1933 });
1934 };
1935
1936 // Check if PR or one of its sub- or super-registers can be used for all
1937 // required register classes.
1938 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1939 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1940 return any_of(
1941 TRI->sub_and_superregs_inclusive(PR),
1942 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1943 });
1944 };
1945
1946 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1947 for (const MCPhysReg &PR : *RegClass) {
1948 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1949 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1950 CanBeUsedForAllClasses(PR)) {
1951 DefinedInBB.addReg(PR);
1952 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1953 << "\n");
1954 return {PR};
1955 }
1956 }
1957 LLVM_DEBUG(dbgs() << "No rename register found from "
1958 << TRI->getRegClassName(RegClass) << "\n");
1959 return std::nullopt;
1960}
1961
1962// For store pairs: returns a register from FirstMI to the beginning of the
1963// block that can be renamed.
1964// For load pairs: returns a register from FirstMI to MI that can be renamed.
1965static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1966 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1967 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1969 const TargetRegisterInfo *TRI) {
1970 std::optional<MCPhysReg> RenameReg;
1971 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1972 return RenameReg;
1973
1974 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1975 MachineFunction &MF = *FirstMI.getParent()->getParent();
1976 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1977 return RenameReg;
1978
1979 const bool IsLoad = FirstMI.mayLoad();
1980
1981 if (!MaybeCanRename) {
1982 if (IsLoad)
1983 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1984 RequiredClasses, TRI)};
1985 else
1986 MaybeCanRename = {
1987 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1988 }
1989
1990 if (*MaybeCanRename) {
1991 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1992 RequiredClasses, TRI);
1993 }
1994 return RenameReg;
1995}
1996
1997/// Scan the instructions looking for a load/store that can be combined with the
1998/// current instruction into a wider equivalent or a load/store pair.
2000AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
2001 LdStPairFlags &Flags, unsigned Limit,
2002 bool FindNarrowMerge) {
2003 MachineBasicBlock::iterator E = I->getParent()->end();
2005 MachineBasicBlock::iterator MBBIWithRenameReg;
2006 MachineInstr &FirstMI = *I;
2007 MBBI = next_nodbg(MBBI, E);
2008
2009 bool MayLoad = FirstMI.mayLoad();
2010 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
2011 Register Reg = getLdStRegOp(FirstMI).getReg();
2014 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
2015 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
2016
2017 std::optional<bool> MaybeCanRename;
2018 if (!EnableRenaming)
2019 MaybeCanRename = {false};
2020
2021 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2022 LiveRegUnits UsedInBetween;
2023 UsedInBetween.init(*TRI);
2024
2025 Flags.clearRenameReg();
2026
2027 // Track which register units have been modified and used between the first
2028 // insn (inclusive) and the second insn.
2029 ModifiedRegUnits.clear();
2030 UsedRegUnits.clear();
2031
2032 // Remember any instructions that read/write memory between FirstMI and MI.
2033 SmallVector<MachineInstr *, 4> MemInsns;
2034
2035 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2036 for (unsigned Count = 0; MBBI != E && Count < Limit;
2037 MBBI = next_nodbg(MBBI, E)) {
2038 MachineInstr &MI = *MBBI;
2039 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2040
2041 UsedInBetween.accumulate(MI);
2042
2043 // Don't count transient instructions towards the search limit since there
2044 // may be different numbers of them if e.g. debug information is present.
2045 if (!MI.isTransient())
2046 ++Count;
2047
2048 Flags.setSExtIdx(-1);
2049 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2051 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2052 // If we've found another instruction with the same opcode, check to see
2053 // if the base and offset are compatible with our starting instruction.
2054 // These instructions all have scaled immediate operands, so we just
2055 // check for +1/-1. Make sure to check the new instruction offset is
2056 // actually an immediate and not a symbolic reference destined for
2057 // a relocation.
2060 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2061 if (IsUnscaled != MIIsUnscaled) {
2062 // We're trying to pair instructions that differ in how they are scaled.
2063 // If FirstMI is scaled then scale the offset of MI accordingly.
2064 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2065 int MemSize = TII->getMemScale(MI);
2066 if (MIIsUnscaled) {
2067 // If the unscaled offset isn't a multiple of the MemSize, we can't
2068 // pair the operations together: bail and keep looking.
2069 if (MIOffset % MemSize) {
2070 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2071 UsedRegUnits, TRI);
2072 MemInsns.push_back(&MI);
2073 continue;
2074 }
2075 MIOffset /= MemSize;
2076 } else {
2077 MIOffset *= MemSize;
2078 }
2079 }
2080
2081 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2082
2083 if (BaseReg == MIBaseReg) {
2084 // If the offset of the second ld/st is not equal to the size of the
2085 // destination register it can’t be paired with a pre-index ld/st
2086 // pair. Additionally if the base reg is used or modified the operations
2087 // can't be paired: bail and keep looking.
2088 if (IsPreLdSt) {
2089 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2090 bool IsBaseRegUsed = !UsedRegUnits.available(
2092 bool IsBaseRegModified = !ModifiedRegUnits.available(
2094 // If the stored value and the address of the second instruction is
2095 // the same, it needs to be using the updated register and therefore
2096 // it must not be folded.
2097 bool IsMIRegTheSame =
2098 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2100 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2101 IsMIRegTheSame) {
2102 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2103 UsedRegUnits, TRI);
2104 MemInsns.push_back(&MI);
2105 continue;
2106 }
2107 } else {
2108 if ((Offset != MIOffset + OffsetStride) &&
2109 (Offset + OffsetStride != MIOffset)) {
2110 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2111 UsedRegUnits, TRI);
2112 MemInsns.push_back(&MI);
2113 continue;
2114 }
2115 }
2116
2117 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2118 if (FindNarrowMerge) {
2119 // If the alignment requirements of the scaled wide load/store
2120 // instruction can't express the offset of the scaled narrow input,
2121 // bail and keep looking. For promotable zero stores, allow only when
2122 // the stored value is the same (i.e., WZR).
2123 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2124 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2125 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2126 UsedRegUnits, TRI);
2127 MemInsns.push_back(&MI);
2128 continue;
2129 }
2130 } else {
2131 // Pairwise instructions have a 7-bit signed offset field. Single
2132 // insns have a 12-bit unsigned offset field. If the resultant
2133 // immediate offset of merging these instructions is out of range for
2134 // a pairwise instruction, bail and keep looking.
2135 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2136 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2137 UsedRegUnits, TRI);
2138 MemInsns.push_back(&MI);
2139 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2140 << "keep looking.\n");
2141 continue;
2142 }
2143 // If the alignment requirements of the paired (scaled) instruction
2144 // can't express the offset of the unscaled input, bail and keep
2145 // looking.
2146 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2147 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2148 UsedRegUnits, TRI);
2149 MemInsns.push_back(&MI);
2151 << "Offset doesn't fit due to alignment requirements, "
2152 << "keep looking.\n");
2153 continue;
2154 }
2155 }
2156
2157 // If the BaseReg has been modified, then we cannot do the optimization.
2158 // For example, in the following pattern
2159 // ldr x1 [x2]
2160 // ldr x2 [x3]
2161 // ldr x4 [x2, #8],
2162 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2163 if (!ModifiedRegUnits.available(BaseReg))
2164 return E;
2165
2166 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2168
2169 // If the Rt of the second instruction (destination register of the
2170 // load) was not modified or used between the two instructions and none
2171 // of the instructions between the second and first alias with the
2172 // second, we can combine the second into the first.
2173 bool RtNotModified =
2174 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2175 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2176 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2177
2178 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2179 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2180 << (RtNotModified ? "true" : "false") << "\n"
2181 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2182 << (RtNotUsed ? "true" : "false") << "\n");
2183
2184 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2185 // For pairs loading into the same reg, try to find a renaming
2186 // opportunity to allow the renaming of Reg between FirstMI and MI
2187 // and combine MI into FirstMI; otherwise bail and keep looking.
2188 if (SameLoadReg) {
2189 std::optional<MCPhysReg> RenameReg =
2190 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2191 Reg, DefinedInBB, UsedInBetween,
2192 RequiredClasses, TRI);
2193 if (!RenameReg) {
2194 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2195 UsedRegUnits, TRI);
2196 MemInsns.push_back(&MI);
2197 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2198 << "keep looking.\n");
2199 continue;
2200 }
2201 Flags.setRenameReg(*RenameReg);
2202 }
2203
2204 Flags.setMergeForward(false);
2205 if (!SameLoadReg)
2206 Flags.clearRenameReg();
2207 return MBBI;
2208 }
2209
2210 // Likewise, if the Rt of the first instruction is not modified or used
2211 // between the two instructions and none of the instructions between the
2212 // first and the second alias with the first, we can combine the first
2213 // into the second.
2214 RtNotModified = !(
2215 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2216
2217 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2218 << "Reg '" << getLdStRegOp(FirstMI)
2219 << "' not modified: "
2220 << (RtNotModified ? "true" : "false") << "\n");
2221
2222 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2223 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2224 Flags.setMergeForward(true);
2225 Flags.clearRenameReg();
2226 return MBBI;
2227 }
2228
2229 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2230 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2231 RequiredClasses, TRI);
2232 if (RenameReg) {
2233 Flags.setMergeForward(true);
2234 Flags.setRenameReg(*RenameReg);
2235 MBBIWithRenameReg = MBBI;
2236 }
2237 }
2238 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2239 << "interference in between, keep looking.\n");
2240 }
2241 }
2242
2243 if (Flags.getRenameReg())
2244 return MBBIWithRenameReg;
2245
2246 // If the instruction wasn't a matching load or store. Stop searching if we
2247 // encounter a call instruction that might modify memory.
2248 if (MI.isCall()) {
2249 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2250 return E;
2251 }
2252
2253 // Update modified / uses register units.
2254 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2255
2256 // Otherwise, if the base register is modified, we have no match, so
2257 // return early.
2258 if (!ModifiedRegUnits.available(BaseReg)) {
2259 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2260 return E;
2261 }
2262
2263 // Update list of instructions that read/write memory.
2264 if (MI.mayLoadOrStore())
2265 MemInsns.push_back(&MI);
2266 }
2267 return E;
2268}
2269
2272 assert((MI.getOpcode() == AArch64::SUBXri ||
2273 MI.getOpcode() == AArch64::ADDXri) &&
2274 "Expected a register update instruction");
2275 auto End = MI.getParent()->end();
2276 if (MaybeCFI == End ||
2277 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2278 !(MI.getFlag(MachineInstr::FrameSetup) ||
2279 MI.getFlag(MachineInstr::FrameDestroy)) ||
2280 MI.getOperand(0).getReg() != AArch64::SP)
2281 return End;
2282
2283 const MachineFunction &MF = *MI.getParent()->getParent();
2284 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2285 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2286 switch (CFI.getOperation()) {
2289 return MaybeCFI;
2290 default:
2291 return End;
2292 }
2293}
2294
2295std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2297 bool IsForward, bool IsPreIdx, bool MergeEither) {
2298 assert((Update->getOpcode() == AArch64::ADDXri ||
2299 Update->getOpcode() == AArch64::SUBXri) &&
2300 "Unexpected base register update instruction to merge!");
2301 MachineBasicBlock::iterator E = I->getParent()->end();
2303
2304 // If updating the SP and the following instruction is CFA offset related CFI,
2305 // make sure the CFI follows the SP update either by merging at the location
2306 // of the update or by moving the CFI after the merged instruction. If unable
2307 // to do so, bail.
2308 MachineBasicBlock::iterator InsertPt = I;
2309 if (IsForward) {
2310 assert(IsPreIdx);
2311 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2312 if (MergeEither) {
2313 InsertPt = Update;
2314 } else {
2315 // Take care not to reorder CFIs.
2316 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2317 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2318 }))
2319 return std::nullopt;
2320
2321 MachineBasicBlock *MBB = InsertPt->getParent();
2322 MBB->splice(std::next(InsertPt), MBB, CFI);
2323 }
2324 }
2325 }
2326
2327 // Return the instruction following the merged instruction, which is
2328 // the instruction following our unmerged load. Unless that's the add/sub
2329 // instruction we're merging, in which case it's the one after that.
2330 if (NextI == Update)
2331 NextI = next_nodbg(NextI, E);
2332
2333 int Value = Update->getOperand(2).getImm();
2334 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2335 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2336 if (Update->getOpcode() == AArch64::SUBXri)
2337 Value = -Value;
2338
2339 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2341 MachineInstrBuilder MIB;
2342 int Scale, MinOffset, MaxOffset;
2343 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2345 // Non-paired instruction.
2346 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2347 TII->get(NewOpc))
2348 .add(Update->getOperand(0))
2349 .add(getLdStRegOp(*I))
2351 .addImm(Value / Scale)
2352 .setMemRefs(I->memoperands())
2353 .setMIFlags(I->mergeFlagsWith(*Update));
2354 } else {
2355 // Paired instruction.
2356 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2357 TII->get(NewOpc))
2358 .add(Update->getOperand(0))
2359 .add(getLdStRegOp(*I, 0))
2360 .add(getLdStRegOp(*I, 1))
2362 .addImm(Value / Scale)
2363 .setMemRefs(I->memoperands())
2364 .setMIFlags(I->mergeFlagsWith(*Update));
2365 }
2366
2367 if (IsPreIdx) {
2368 ++NumPreFolded;
2369 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2370 } else {
2371 ++NumPostFolded;
2372 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2373 }
2374 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2375 LLVM_DEBUG(I->print(dbgs()));
2376 LLVM_DEBUG(dbgs() << " ");
2377 LLVM_DEBUG(Update->print(dbgs()));
2378 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2379 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2380 LLVM_DEBUG(dbgs() << "\n");
2381
2382 // Erase the old instructions for the block.
2383 I->eraseFromParent();
2384 Update->eraseFromParent();
2385
2386 return NextI;
2387}
2388
2390AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2392 unsigned Offset, int Scale) {
2393 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2394 "Unexpected const mov instruction to merge!");
2395 MachineBasicBlock::iterator E = I->getParent()->end();
2397 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2398 MachineInstr &MemMI = *I;
2399 unsigned Mask = (1 << 12) * Scale - 1;
2400 unsigned Low = Offset & Mask;
2401 unsigned High = Offset - Low;
2404 MachineInstrBuilder AddMIB, MemMIB;
2405
2406 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2407 AddMIB =
2408 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2409 .addDef(IndexReg)
2410 .addUse(BaseReg)
2411 .addImm(High >> 12) // shifted value
2412 .addImm(12); // shift 12
2413 (void)AddMIB;
2414 // Ld/St DestReg, IndexReg, Imm12
2415 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2416 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2417 .add(getLdStRegOp(MemMI))
2419 .addImm(Low / Scale)
2420 .setMemRefs(I->memoperands())
2421 .setMIFlags(I->mergeFlagsWith(*Update));
2422 (void)MemMIB;
2423
2424 ++NumConstOffsetFolded;
2425 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2426 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2427 LLVM_DEBUG(PrevI->print(dbgs()));
2428 LLVM_DEBUG(dbgs() << " ");
2429 LLVM_DEBUG(Update->print(dbgs()));
2430 LLVM_DEBUG(dbgs() << " ");
2431 LLVM_DEBUG(I->print(dbgs()));
2432 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2433 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2434 LLVM_DEBUG(dbgs() << " ");
2435 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2436 LLVM_DEBUG(dbgs() << "\n");
2437
2438 // Erase the old instructions for the block.
2439 I->eraseFromParent();
2440 PrevI->eraseFromParent();
2441 Update->eraseFromParent();
2442
2443 return NextI;
2444}
2445
2446bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2447 MachineInstr &MI,
2448 unsigned BaseReg, int Offset) {
2449 switch (MI.getOpcode()) {
2450 default:
2451 break;
2452 case AArch64::SUBXri:
2453 case AArch64::ADDXri:
2454 // Make sure it's a vanilla immediate operand, not a relocation or
2455 // anything else we can't handle.
2456 if (!MI.getOperand(2).isImm())
2457 break;
2458 // Watch out for 1 << 12 shifted value.
2459 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2460 break;
2461
2462 // The update instruction source and destination register must be the
2463 // same as the load/store base register.
2464 if (MI.getOperand(0).getReg() != BaseReg ||
2465 MI.getOperand(1).getReg() != BaseReg)
2466 break;
2467
2468 int UpdateOffset = MI.getOperand(2).getImm();
2469 if (MI.getOpcode() == AArch64::SUBXri)
2470 UpdateOffset = -UpdateOffset;
2471
2472 // The immediate must be a multiple of the scaling factor of the pre/post
2473 // indexed instruction.
2474 int Scale, MinOffset, MaxOffset;
2475 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2476 if (UpdateOffset % Scale != 0)
2477 break;
2478
2479 // Scaled offset must fit in the instruction immediate.
2480 int ScaledOffset = UpdateOffset / Scale;
2481 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2482 break;
2483
2484 // If we have a non-zero Offset, we check that it matches the amount
2485 // we're adding to the register.
2486 if (!Offset || Offset == UpdateOffset)
2487 return true;
2488 break;
2489 }
2490 return false;
2491}
2492
2493bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2494 MachineInstr &MI,
2495 unsigned IndexReg,
2496 unsigned &Offset) {
2497 // The update instruction source and destination register must be the
2498 // same as the load/store index register.
2499 if (MI.getOpcode() == AArch64::MOVKWi &&
2500 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2501
2502 // movz + movk hold a large offset of a Ld/St instruction.
2503 MachineBasicBlock::iterator B = MI.getParent()->begin();
2505 // Skip the scene when the MI is the first instruction of a block.
2506 if (MBBI == B)
2507 return false;
2508 MBBI = prev_nodbg(MBBI, B);
2509 MachineInstr &MovzMI = *MBBI;
2510 // Make sure the MOVKWi and MOVZWi set the same register.
2511 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2512 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2513 unsigned Low = MovzMI.getOperand(1).getImm();
2514 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2515 Offset = High + Low;
2516 // 12-bit optionally shifted immediates are legal for adds.
2517 return Offset >> 24 == 0;
2518 }
2519 }
2520 return false;
2521}
2522
2523MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2524 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2525 MachineBasicBlock::iterator E = I->getParent()->end();
2526 MachineInstr &MemMI = *I;
2528
2530 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2531 TII->getMemScale(MemMI);
2532
2533 // Scan forward looking for post-index opportunities. Updating instructions
2534 // can't be formed if the memory instruction doesn't have the offset we're
2535 // looking for.
2536 if (MIUnscaledOffset != UnscaledOffset)
2537 return E;
2538
2539 // If the base register overlaps a source/destination register, we can't
2540 // merge the update. This does not apply to tag store instructions which
2541 // ignore the address part of the source register.
2542 // This does not apply to STGPi as well, which does not have unpredictable
2543 // behavior in this case unlike normal stores, and always performs writeback
2544 // after reading the source register value.
2545 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2546 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2547 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2548 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2549 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2550 return E;
2551 }
2552 }
2553
2554 // Track which register units have been modified and used between the first
2555 // insn (inclusive) and the second insn.
2556 ModifiedRegUnits.clear();
2557 UsedRegUnits.clear();
2558 MBBI = next_nodbg(MBBI, E);
2559
2560 // We can't post-increment the stack pointer if any instruction between
2561 // the memory access (I) and the increment (MBBI) can access the memory
2562 // region defined by [SP, MBBI].
2563 const bool BaseRegSP = BaseReg == AArch64::SP;
2564 if (BaseRegSP && needsWinCFI(I->getMF())) {
2565 // FIXME: For now, we always block the optimization over SP in windows
2566 // targets as it requires to adjust the unwind/debug info, messing up
2567 // the unwind info can actually cause a miscompile.
2568 return E;
2569 }
2570
2571 unsigned Count = 0;
2572 MachineBasicBlock *CurMBB = I->getParent();
2573 // choice of next block to visit is liveins-based
2574 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2575
2576 while (true) {
2577 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2578 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2579 MachineInstr &MI = *MBBI;
2580
2581 // Don't count transient instructions towards the search limit since there
2582 // may be different numbers of them if e.g. debug information is present.
2583 if (!MI.isTransient())
2584 ++Count;
2585
2586 // If we found a match, return it.
2587 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2588 return MBBI;
2589
2590 // Update the status of what the instruction clobbered and used.
2591 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2592 TRI);
2593
2594 // Otherwise, if the base register is used or modified, we have no match,
2595 // so return early. If we are optimizing SP, do not allow instructions
2596 // that may load or store in between the load and the optimized value
2597 // update.
2598 if (!ModifiedRegUnits.available(BaseReg) ||
2599 !UsedRegUnits.available(BaseReg) ||
2600 (BaseRegSP && MBBI->mayLoadOrStore()))
2601 return E;
2602 }
2603
2604 if (!VisitSucc || Limit <= Count)
2605 break;
2606
2607 // Try to go downward to successors along a CF path w/o side enters
2608 // such that BaseReg is alive along it but not at its exits
2609 MachineBasicBlock *SuccToVisit = nullptr;
2610 unsigned LiveSuccCount = 0;
2611 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2612 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2613 if (Succ->isLiveIn(*AI)) {
2614 if (LiveSuccCount++)
2615 return E;
2616 if (Succ->pred_size() == 1)
2617 SuccToVisit = Succ;
2618 break;
2619 }
2620 }
2621 }
2622 if (!SuccToVisit)
2623 break;
2624 CurMBB = SuccToVisit;
2625 MBBI = CurMBB->begin();
2626 }
2627
2628 return E;
2629}
2630
2631MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2632 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2633 MachineBasicBlock::iterator B = I->getParent()->begin();
2634 MachineBasicBlock::iterator E = I->getParent()->end();
2635 MachineInstr &MemMI = *I;
2637 MachineFunction &MF = *MemMI.getMF();
2638
2641
2642 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2643 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2644 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2645 : AArch64::NoRegister};
2646
2647 // If the load/store is the first instruction in the block, there's obviously
2648 // not any matching update. Ditto if the memory offset isn't zero.
2649 if (MBBI == B || Offset != 0)
2650 return E;
2651 // If the base register overlaps a destination register, we can't
2652 // merge the update.
2653 if (!isTagStore(MemMI)) {
2654 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2655 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2656 return E;
2657 }
2658
2659 const bool BaseRegSP = BaseReg == AArch64::SP;
2660 if (BaseRegSP && needsWinCFI(I->getMF())) {
2661 // FIXME: For now, we always block the optimization over SP in windows
2662 // targets as it requires to adjust the unwind/debug info, messing up
2663 // the unwind info can actually cause a miscompile.
2664 return E;
2665 }
2666
2667 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2668 unsigned RedZoneSize =
2669 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2670
2671 // Track which register units have been modified and used between the first
2672 // insn (inclusive) and the second insn.
2673 ModifiedRegUnits.clear();
2674 UsedRegUnits.clear();
2675 unsigned Count = 0;
2676 bool MemAccessBeforeSPPreInc = false;
2677 MergeEither = true;
2678 do {
2679 MBBI = prev_nodbg(MBBI, B);
2680 MachineInstr &MI = *MBBI;
2681
2682 // Don't count transient instructions towards the search limit since there
2683 // may be different numbers of them if e.g. debug information is present.
2684 if (!MI.isTransient())
2685 ++Count;
2686
2687 // If we found a match, return it.
2688 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2689 // Check that the update value is within our red zone limit (which may be
2690 // zero).
2691 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2692 return E;
2693 return MBBI;
2694 }
2695
2696 // Update the status of what the instruction clobbered and used.
2697 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2698
2699 // Otherwise, if the base register is used or modified, we have no match, so
2700 // return early.
2701 if (!ModifiedRegUnits.available(BaseReg) ||
2702 !UsedRegUnits.available(BaseReg))
2703 return E;
2704
2705 // If we have a destination register (i.e. a load instruction) and a
2706 // destination register is used or modified, then we can only merge forward,
2707 // i.e. the combined instruction is put in the place of the memory
2708 // instruction. Same applies if we see a memory access or side effects.
2709 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2710 (DestReg[0] != AArch64::NoRegister &&
2711 !(ModifiedRegUnits.available(DestReg[0]) &&
2712 UsedRegUnits.available(DestReg[0]))) ||
2713 (DestReg[1] != AArch64::NoRegister &&
2714 !(ModifiedRegUnits.available(DestReg[1]) &&
2715 UsedRegUnits.available(DestReg[1]))))
2716 MergeEither = false;
2717
2718 // Keep track if we have a memory access before an SP pre-increment, in this
2719 // case we need to validate later that the update amount respects the red
2720 // zone.
2721 if (BaseRegSP && MBBI->mayLoadOrStore())
2722 MemAccessBeforeSPPreInc = true;
2723 } while (MBBI != B && Count < Limit);
2724 return E;
2725}
2726
2728AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2729 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2730 MachineBasicBlock::iterator B = I->getParent()->begin();
2731 MachineBasicBlock::iterator E = I->getParent()->end();
2732 MachineInstr &MemMI = *I;
2734
2735 // If the load is the first instruction in the block, there's obviously
2736 // not any matching load or store.
2737 if (MBBI == B)
2738 return E;
2739
2740 // Make sure the IndexReg is killed and the shift amount is zero.
2741 // TODO: Relex this restriction to extend, simplify processing now.
2742 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2743 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2745 return E;
2746
2748
2749 // Track which register units have been modified and used between the first
2750 // insn (inclusive) and the second insn.
2751 ModifiedRegUnits.clear();
2752 UsedRegUnits.clear();
2753 unsigned Count = 0;
2754 do {
2755 MBBI = prev_nodbg(MBBI, B);
2756 MachineInstr &MI = *MBBI;
2757
2758 // Don't count transient instructions towards the search limit since there
2759 // may be different numbers of them if e.g. debug information is present.
2760 if (!MI.isTransient())
2761 ++Count;
2762
2763 // If we found a match, return it.
2764 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2765 return MBBI;
2766 }
2767
2768 // Update the status of what the instruction clobbered and used.
2769 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2770
2771 // Otherwise, if the index register is used or modified, we have no match,
2772 // so return early.
2773 if (!ModifiedRegUnits.available(IndexReg) ||
2774 !UsedRegUnits.available(IndexReg))
2775 return E;
2776
2777 } while (MBBI != B && Count < Limit);
2778 return E;
2779}
2780
2781bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2783 MachineInstr &MI = *MBBI;
2784 // If this is a volatile load, don't mess with it.
2785 if (MI.hasOrderedMemoryRef())
2786 return false;
2787
2788 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2789 return false;
2790
2791 // Make sure this is a reg+imm.
2792 // FIXME: It is possible to extend it to handle reg+reg cases.
2794 return false;
2795
2796 // Look backward up to LdStLimit instructions.
2798 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2799 ++NumLoadsFromStoresPromoted;
2800 // Promote the load. Keeping the iterator straight is a
2801 // pain, so we let the merge routine tell us what the next instruction
2802 // is after it's done mucking about.
2803 MBBI = promoteLoadFromStore(MBBI, StoreI);
2804 return true;
2805 }
2806 return false;
2807}
2808
2809// Merge adjacent zero stores into a wider store.
2810bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2812 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2813 MachineInstr &MI = *MBBI;
2814 MachineBasicBlock::iterator E = MI.getParent()->end();
2815
2816 if (!TII->isCandidateToMergeOrPair(MI))
2817 return false;
2818
2819 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2820 LdStPairFlags Flags;
2822 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2823 if (MergeMI != E) {
2824 ++NumZeroStoresPromoted;
2825
2826 // Keeping the iterator straight is a pain, so we let the merge routine tell
2827 // us what the next instruction is after it's done mucking about.
2828 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2829 return true;
2830 }
2831 return false;
2832}
2833
2834// Find loads and stores that can be merged into a single load or store pair
2835// instruction.
2836bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2837 MachineInstr &MI = *MBBI;
2838 MachineBasicBlock::iterator E = MI.getParent()->end();
2839
2840 if (!TII->isCandidateToMergeOrPair(MI))
2841 return false;
2842
2843 // If disable-ldp feature is opted, do not emit ldp.
2844 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2845 return false;
2846
2847 // If disable-stp feature is opted, do not emit stp.
2848 if (MI.mayStore() && Subtarget->hasDisableStp())
2849 return false;
2850
2851 // Early exit if the offset is not possible to match. (6 bits of positive
2852 // range, plus allow an extra one in case we find a later insn that matches
2853 // with Offset-1)
2854 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2856 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2857 // Allow one more for offset.
2858 if (Offset > 0)
2859 Offset -= OffsetStride;
2860 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2861 return false;
2862
2863 // Look ahead up to LdStLimit instructions for a pairable instruction.
2864 LdStPairFlags Flags;
2866 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2867 if (Paired != E) {
2868 // Keeping the iterator straight is a pain, so we let the merge routine tell
2869 // us what the next instruction is after it's done mucking about.
2870 auto Prev = std::prev(MBBI);
2871
2872 // Fetch the memoperand of the load/store that is a candidate for
2873 // combination.
2874 MachineMemOperand *MemOp =
2875 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2876
2877 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2878 // that the alignment of the source pointer is at least double the alignment
2879 // of the type.
2880 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2881 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2882 // If there is no size/align information, cancel the transformation.
2883 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2884 NumFailedAlignmentCheck++;
2885 return false;
2886 }
2887
2888 // Get the needed alignments to check them if
2889 // ldp-aligned-only/stp-aligned-only features are opted.
2890 uint64_t MemAlignment = MemOp->getAlign().value();
2891 uint64_t TypeAlignment =
2892 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2893
2894 if (MemAlignment < 2 * TypeAlignment) {
2895 NumFailedAlignmentCheck++;
2896 return false;
2897 }
2898 }
2899
2900 ++NumPairCreated;
2901 if (TII->hasUnscaledLdStOffset(MI))
2902 ++NumUnscaledPairCreated;
2903
2904 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2905 // Collect liveness info for instructions between Prev and the new position
2906 // MBBI.
2907 for (auto I = std::next(Prev); I != MBBI; I++)
2908 updateDefinedRegisters(*I, DefinedInBB, TRI);
2909
2910 return true;
2911 }
2912 return false;
2913}
2914
2915bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2917 MachineInstr &MI = *MBBI;
2918 MachineBasicBlock::iterator E = MI.getParent()->end();
2920
2921 // Look forward to try to form a post-index instruction. For example,
2922 // ldr x0, [x20]
2923 // add x20, x20, #32
2924 // merged into:
2925 // ldr x0, [x20], #32
2926 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2927 if (Update != E) {
2928 // Merge the update into the ld/st.
2929 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2930 /*IsPreIdx=*/false,
2931 /*MergeEither=*/false)) {
2932 MBBI = *NextI;
2933 return true;
2934 }
2935 }
2936
2937 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2938 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2939 return false;
2940
2941 // Look back to try to find a pre-index instruction. For example,
2942 // add x0, x0, #8
2943 // ldr x1, [x0]
2944 // merged into:
2945 // ldr x1, [x0, #8]!
2946 bool MergeEither;
2947 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2948 if (Update != E) {
2949 // Merge the update into the ld/st.
2950 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2951 /*IsPreIdx=*/true, MergeEither)) {
2952 MBBI = *NextI;
2953 return true;
2954 }
2955 }
2956
2957 // The immediate in the load/store is scaled by the size of the memory
2958 // operation. The immediate in the add we're looking for,
2959 // however, is not, so adjust here.
2960 int UnscaledOffset =
2962
2963 // Look forward to try to find a pre-index instruction. For example,
2964 // ldr x1, [x0, #64]
2965 // add x0, x0, #64
2966 // merged into:
2967 // ldr x1, [x0, #64]!
2968 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2969 if (Update != E) {
2970 // Merge the update into the ld/st.
2971 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2972 /*IsPreIdx=*/true,
2973 /*MergeEither=*/false)) {
2974 MBBI = *NextI;
2975 return true;
2976 }
2977 }
2978
2979 return false;
2980}
2981
2982bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2983 int Scale) {
2984 MachineInstr &MI = *MBBI;
2985 MachineBasicBlock::iterator E = MI.getParent()->end();
2987
2988 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2989 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2990 return false;
2991
2992 // Look back to try to find a const offset for index LdSt instruction. For
2993 // example,
2994 // mov x8, #LargeImm ; = a * (1<<12) + imm12
2995 // ldr x1, [x0, x8]
2996 // merged into:
2997 // add x8, x0, a * (1<<12)
2998 // ldr x1, [x8, imm12]
2999 unsigned Offset;
3000 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
3001 if (Update != E && (Offset & (Scale - 1)) == 0) {
3002 // Merge the imm12 into the ld/st.
3003 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
3004 return true;
3005 }
3006
3007 return false;
3008}
3009
3010bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
3011 bool EnableNarrowZeroStOpt) {
3012 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
3013
3014 bool Modified = false;
3015 // Four transformations to do here:
3016 // 1) Find loads that directly read from stores and promote them by
3017 // replacing with mov instructions. If the store is wider than the load,
3018 // the load will be replaced with a bitfield extract.
3019 // e.g.,
3020 // str w1, [x0, #4]
3021 // ldrh w2, [x0, #6]
3022 // ; becomes
3023 // str w1, [x0, #4]
3024 // lsr w2, w1, #16
3026 MBBI != E;) {
3027 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3028 Modified = true;
3029 else
3030 ++MBBI;
3031 }
3032 // 2) Merge adjacent zero stores into a wider store.
3033 // e.g.,
3034 // strh wzr, [x0]
3035 // strh wzr, [x0, #2]
3036 // ; becomes
3037 // str wzr, [x0]
3038 // e.g.,
3039 // str wzr, [x0]
3040 // str wzr, [x0, #4]
3041 // ; becomes
3042 // str xzr, [x0]
3043 if (EnableNarrowZeroStOpt)
3045 MBBI != E;) {
3046 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3047 Modified = true;
3048 else
3049 ++MBBI;
3050 }
3051 // 3) Find loads and stores that can be merged into a single load or store
3052 // pair instruction.
3053 // When compiling for SVE 128, also try to combine SVE fill/spill
3054 // instructions into LDP/STP.
3055 // e.g.,
3056 // ldr x0, [x2]
3057 // ldr x1, [x2, #8]
3058 // ; becomes
3059 // ldp x0, x1, [x2]
3060 // e.g.,
3061 // ldr z0, [x2]
3062 // ldr z1, [x2, #1, mul vl]
3063 // ; becomes
3064 // ldp q0, q1, [x2]
3065
3067 DefinedInBB.clear();
3068 DefinedInBB.addLiveIns(MBB);
3069 }
3070
3072 MBBI != E;) {
3073 // Track currently live registers up to this point, to help with
3074 // searching for a rename register on demand.
3075 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3076 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3077 Modified = true;
3078 else
3079 ++MBBI;
3080 }
3081 // 4) Find base register updates that can be merged into the load or store
3082 // as a base-reg writeback.
3083 // e.g.,
3084 // ldr x0, [x2]
3085 // add x2, x2, #4
3086 // ; becomes
3087 // ldr x0, [x2], #4
3089 MBBI != E;) {
3090 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3091 Modified = true;
3092 else
3093 ++MBBI;
3094 }
3095
3096 // 5) Find a register assigned with a const value that can be combined with
3097 // into the load or store. e.g.,
3098 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3099 // ldr x1, [x0, x8]
3100 // ; becomes
3101 // add x8, x0, a * (1<<12)
3102 // ldr x1, [x8, imm12]
3104 MBBI != E;) {
3105 int Scale;
3106 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3107 Modified = true;
3108 else
3109 ++MBBI;
3110 }
3111
3112 return Modified;
3113}
3114
3115bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3116 if (skipFunction(Fn.getFunction()))
3117 return false;
3118
3119 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3120 TII = Subtarget->getInstrInfo();
3121 TRI = Subtarget->getRegisterInfo();
3122 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3123
3124 // Resize the modified and used register unit trackers. We do this once
3125 // per function and then clear the register units each time we optimize a load
3126 // or store.
3127 ModifiedRegUnits.init(*TRI);
3128 UsedRegUnits.init(*TRI);
3129 DefinedInBB.init(*TRI);
3130
3131 bool Modified = false;
3132 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3133 for (auto &MBB : Fn) {
3134 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3135 Modified |= M;
3136 }
3137
3138 return Modified;
3139}
3140
3141// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3142// stores near one another? Note: The pre-RA instruction scheduler already has
3143// hooks to try and schedule pairable loads/stores together to improve pairing
3144// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3145
3146// FIXME: When pairing store instructions it's very possible for this pass to
3147// hoist a store with a KILL marker above another use (without a KILL marker).
3148// The resulting IR is invalid, but nothing uses the KILL markers after this
3149// pass, so it's never caused a problem in practice.
3150
3151/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3152/// load / store optimization pass.
3154 return new AArch64LoadStoreOpt();
3155}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:655
OpType getOperation() const
Definition MCDwarf.h:714
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.