LLVM 23.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
24#include "llvm/ADT/SetVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringRef.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/MC/MCAsmInfo.h"
40#include "llvm/MC/MCDwarf.h"
41#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <functional>
49#include <iterator>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "aarch64-ldst-opt"
56
57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded, "Number of post-index updates folded");
59STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60STATISTIC(NumUnscaledPairCreated,
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
65 "not passed the alignment check");
66STATISTIC(NumConstOffsetFolded,
67 "Number of const offset of index address folded");
68
69DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
70 "Controls which pairs are considered for renaming");
71
72// The LdStLimit limits how far we search for load/store pairs.
73static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
74 cl::init(20), cl::Hidden);
75
76// The UpdateLimit limits how far we search for update instructions when we form
77// pre-/post-index instructions.
78static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
80
81// The LdStConstLimit limits how far we search for const offset instructions
82// when we form index address load/store instructions.
83static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
84 cl::init(10), cl::Hidden);
85
86// Enable register renaming to find additional store pairing opportunities.
87static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
88 cl::init(true), cl::Hidden);
89
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
91
92namespace {
93
94using LdStPairFlags = struct LdStPairFlags {
95 // If a matching instruction is found, MergeForward is set to true if the
96 // merge is to remove the first instruction and replace the second with
97 // a pair-wise insn, and false if the reverse is true.
98 bool MergeForward = false;
99
100 // SExtIdx gives the index of the result of the load pair that must be
101 // extended. The value of SExtIdx assumes that the paired load produces the
102 // value in this order: (I, returned iterator), i.e., -1 means no value has
103 // to be extended, 0 means I, and 1 means the returned iterator.
104 int SExtIdx = -1;
105
106 // If not none, RenameReg can be used to rename the result register of the
107 // first store in a pair. Currently this only works when merging stores
108 // forward.
109 std::optional<MCPhysReg> RenameReg;
110
111 LdStPairFlags() = default;
112
113 void setMergeForward(bool V = true) { MergeForward = V; }
114 bool getMergeForward() const { return MergeForward; }
115
116 void setSExtIdx(int V) { SExtIdx = V; }
117 int getSExtIdx() const { return SExtIdx; }
118
119 void setRenameReg(MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
122};
123
124struct AArch64LoadStoreOpt {
126 const AArch64InstrInfo *TII;
127 const TargetRegisterInfo *TRI;
128 const AArch64Subtarget *Subtarget;
129
130 // Track which register units have been modified and used.
131 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
132 LiveRegUnits DefinedInBB;
133
134 // Scan the instructions looking for a load/store that can be combined
135 // with the current instruction into a load/store pair.
136 // Return the matching instruction if one is found, else MBB->end().
138 LdStPairFlags &Flags,
139 unsigned Limit,
140 bool FindNarrowMerge);
141
142 // Scan the instructions looking for a store that writes to the address from
143 // which the current load instruction reads. Return true if one is found.
144 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
146
147 // Merge the two instructions indicated into a wider narrow store instruction.
149 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
151 const LdStPairFlags &Flags);
152
153 // Merge the two instructions indicated into a single pair-wise instruction.
155 mergePairedInsns(MachineBasicBlock::iterator I,
157 const LdStPairFlags &Flags);
158
159 // Promote the load that reads directly from the address stored to.
161 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
163
164 // Scan the instruction list to find a base register update that can
165 // be combined with the current instruction (a load or store) using
166 // pre or post indexed addressing with writeback. Scan forwards.
168 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
169 int UnscaledOffset, unsigned Limit);
170
171 // Scan the instruction list to find a register assigned with a const
172 // value that can be combined with the current instruction (a load or store)
173 // using base addressing with writeback. Scan backwards.
175 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
176 unsigned &Offset);
177
178 // Scan the instruction list to find a base register update that can
179 // be combined with the current instruction (a load or store) using
180 // pre or post indexed addressing with writeback. Scan backwards.
181 // `MergeEither` is set to true if the combined instruction may be placed
182 // either at the location of the load/store instruction or at the location of
183 // the update instruction.
185 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
186 bool &MergeEither);
187
188 // Find an instruction that updates the base register of the ld/st
189 // instruction.
190 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
191 unsigned BaseReg, int Offset);
192
193 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
194 unsigned IndexReg, unsigned &Offset);
195
196 // Merge a pre- or post-index base register update into a ld/st instruction.
197 std::optional<MachineBasicBlock::iterator>
198 mergeUpdateInsn(MachineBasicBlock::iterator I,
199 MachineBasicBlock::iterator Update, bool IsForward,
200 bool IsPreIdx, bool MergeEither);
201
203 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
204 MachineBasicBlock::iterator Update, unsigned Offset,
205 int Scale);
206
207 // Find and merge zero store instructions.
208 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
209
210 // Find and pair ldr/str instructions.
211 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
212
213 // Find and promote load instructions which read directly from store.
214 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
215
216 // Find and merge a base register updates before or after a ld/st instruction.
217 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
218
219 // Find and merge an index ldr/st instruction into a base ld/st instruction.
220 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
221
222 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
223
224 bool runOnMachineFunction(MachineFunction &MF);
225};
226
227struct AArch64LoadStoreOptLegacy : public MachineFunctionPass {
228 static char ID;
229
230 AArch64LoadStoreOptLegacy() : MachineFunctionPass(ID) {}
231
232 bool runOnMachineFunction(MachineFunction &Fn) override;
233
234 void getAnalysisUsage(AnalysisUsage &AU) const override {
237 }
238
239 MachineFunctionProperties getRequiredProperties() const override {
240 return MachineFunctionProperties().setNoVRegs();
241 }
242
243 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
244};
245
246char AArch64LoadStoreOptLegacy::ID = 0;
247
248} // end anonymous namespace
249
250INITIALIZE_PASS(AArch64LoadStoreOptLegacy, "aarch64-ldst-opt",
251 AARCH64_LOAD_STORE_OPT_NAME, false, false)
252
253static bool isNarrowStore(unsigned Opc) {
254 switch (Opc) {
255 default:
256 return false;
257 case AArch64::STRBBui:
258 case AArch64::STURBBi:
259 case AArch64::STRHHui:
260 case AArch64::STURHHi:
261 return true;
262 }
263}
264
265// These instruction set memory tag and either keep memory contents unchanged or
266// set it to zero, ignoring the address part of the source register.
267static bool isTagStore(const MachineInstr &MI) {
268 switch (MI.getOpcode()) {
269 default:
270 return false;
271 case AArch64::STGi:
272 case AArch64::STZGi:
273 case AArch64::ST2Gi:
274 case AArch64::STZ2Gi:
275 return true;
276 }
277}
278
279static unsigned getMatchingNonSExtOpcode(unsigned Opc,
280 bool *IsValidLdStrOpc = nullptr) {
281 if (IsValidLdStrOpc)
282 *IsValidLdStrOpc = true;
283 switch (Opc) {
284 default:
285 if (IsValidLdStrOpc)
286 *IsValidLdStrOpc = false;
287 return std::numeric_limits<unsigned>::max();
288 case AArch64::STRDui:
289 case AArch64::STURDi:
290 case AArch64::STRDpre:
291 case AArch64::STRQui:
292 case AArch64::STURQi:
293 case AArch64::STRQpre:
294 case AArch64::STRBBui:
295 case AArch64::STURBBi:
296 case AArch64::STRHHui:
297 case AArch64::STURHHi:
298 case AArch64::STRWui:
299 case AArch64::STRWpre:
300 case AArch64::STURWi:
301 case AArch64::STRXui:
302 case AArch64::STRXpre:
303 case AArch64::STURXi:
304 case AArch64::STR_ZXI:
305 case AArch64::LDRDui:
306 case AArch64::LDURDi:
307 case AArch64::LDRDpre:
308 case AArch64::LDRQui:
309 case AArch64::LDURQi:
310 case AArch64::LDRQpre:
311 case AArch64::LDRWui:
312 case AArch64::LDURWi:
313 case AArch64::LDRWpre:
314 case AArch64::LDRXui:
315 case AArch64::LDURXi:
316 case AArch64::LDRXpre:
317 case AArch64::STRSui:
318 case AArch64::STURSi:
319 case AArch64::STRSpre:
320 case AArch64::LDRSui:
321 case AArch64::LDURSi:
322 case AArch64::LDRSpre:
323 case AArch64::LDR_ZXI:
324 return Opc;
325 case AArch64::LDRSWui:
326 return AArch64::LDRWui;
327 case AArch64::LDURSWi:
328 return AArch64::LDURWi;
329 case AArch64::LDRSWpre:
330 return AArch64::LDRWpre;
331 }
332}
333
334static unsigned getMatchingWideOpcode(unsigned Opc) {
335 switch (Opc) {
336 default:
337 llvm_unreachable("Opcode has no wide equivalent!");
338 case AArch64::STRBBui:
339 return AArch64::STRHHui;
340 case AArch64::STRHHui:
341 return AArch64::STRWui;
342 case AArch64::STURBBi:
343 return AArch64::STURHHi;
344 case AArch64::STURHHi:
345 return AArch64::STURWi;
346 case AArch64::STURWi:
347 return AArch64::STURXi;
348 case AArch64::STRWui:
349 return AArch64::STRXui;
350 }
351}
352
353static unsigned getMatchingPairOpcode(unsigned Opc) {
354 switch (Opc) {
355 default:
356 llvm_unreachable("Opcode has no pairwise equivalent!");
357 case AArch64::STRSui:
358 case AArch64::STURSi:
359 return AArch64::STPSi;
360 case AArch64::STRSpre:
361 return AArch64::STPSpre;
362 case AArch64::STRDui:
363 case AArch64::STURDi:
364 return AArch64::STPDi;
365 case AArch64::STRDpre:
366 return AArch64::STPDpre;
367 case AArch64::STRQui:
368 case AArch64::STURQi:
369 case AArch64::STR_ZXI:
370 return AArch64::STPQi;
371 case AArch64::STRQpre:
372 return AArch64::STPQpre;
373 case AArch64::STRWui:
374 case AArch64::STURWi:
375 return AArch64::STPWi;
376 case AArch64::STRWpre:
377 return AArch64::STPWpre;
378 case AArch64::STRXui:
379 case AArch64::STURXi:
380 return AArch64::STPXi;
381 case AArch64::STRXpre:
382 return AArch64::STPXpre;
383 case AArch64::LDRSui:
384 case AArch64::LDURSi:
385 return AArch64::LDPSi;
386 case AArch64::LDRSpre:
387 return AArch64::LDPSpre;
388 case AArch64::LDRDui:
389 case AArch64::LDURDi:
390 return AArch64::LDPDi;
391 case AArch64::LDRDpre:
392 return AArch64::LDPDpre;
393 case AArch64::LDRQui:
394 case AArch64::LDURQi:
395 case AArch64::LDR_ZXI:
396 return AArch64::LDPQi;
397 case AArch64::LDRQpre:
398 return AArch64::LDPQpre;
399 case AArch64::LDRWui:
400 case AArch64::LDURWi:
401 return AArch64::LDPWi;
402 case AArch64::LDRWpre:
403 return AArch64::LDPWpre;
404 case AArch64::LDRXui:
405 case AArch64::LDURXi:
406 return AArch64::LDPXi;
407 case AArch64::LDRXpre:
408 return AArch64::LDPXpre;
409 case AArch64::LDRSWui:
410 case AArch64::LDURSWi:
411 return AArch64::LDPSWi;
412 case AArch64::LDRSWpre:
413 return AArch64::LDPSWpre;
414 }
415}
416
419 unsigned LdOpc = LoadInst.getOpcode();
420 unsigned StOpc = StoreInst.getOpcode();
421 switch (LdOpc) {
422 default:
423 llvm_unreachable("Unsupported load instruction!");
424 case AArch64::LDRBBui:
425 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
426 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
427 case AArch64::LDURBBi:
428 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
429 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
430 case AArch64::LDRHHui:
431 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
432 StOpc == AArch64::STRXui;
433 case AArch64::LDURHHi:
434 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
435 StOpc == AArch64::STURXi;
436 case AArch64::LDRWui:
437 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
438 case AArch64::LDURWi:
439 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRXui:
441 return StOpc == AArch64::STRXui;
442 case AArch64::LDURXi:
443 return StOpc == AArch64::STURXi;
444 }
445}
446
447static unsigned getPreIndexedOpcode(unsigned Opc) {
448 // FIXME: We don't currently support creating pre-indexed loads/stores when
449 // the load or store is the unscaled version. If we decide to perform such an
450 // optimization in the future the cases for the unscaled loads/stores will
451 // need to be added here.
452 switch (Opc) {
453 default:
454 llvm_unreachable("Opcode has no pre-indexed equivalent!");
455 case AArch64::STRBui:
456 return AArch64::STRBpre;
457 case AArch64::STRHui:
458 return AArch64::STRHpre;
459 case AArch64::STRSui:
460 return AArch64::STRSpre;
461 case AArch64::STRDui:
462 return AArch64::STRDpre;
463 case AArch64::STRQui:
464 return AArch64::STRQpre;
465 case AArch64::STRBBui:
466 return AArch64::STRBBpre;
467 case AArch64::STRHHui:
468 return AArch64::STRHHpre;
469 case AArch64::STRWui:
470 return AArch64::STRWpre;
471 case AArch64::STRXui:
472 return AArch64::STRXpre;
473 case AArch64::LDRBui:
474 return AArch64::LDRBpre;
475 case AArch64::LDRHui:
476 return AArch64::LDRHpre;
477 case AArch64::LDRSui:
478 return AArch64::LDRSpre;
479 case AArch64::LDRDui:
480 return AArch64::LDRDpre;
481 case AArch64::LDRQui:
482 return AArch64::LDRQpre;
483 case AArch64::LDRBBui:
484 return AArch64::LDRBBpre;
485 case AArch64::LDRHHui:
486 return AArch64::LDRHHpre;
487 case AArch64::LDRWui:
488 return AArch64::LDRWpre;
489 case AArch64::LDRXui:
490 return AArch64::LDRXpre;
491 case AArch64::LDRSWui:
492 return AArch64::LDRSWpre;
493 case AArch64::LDPSi:
494 return AArch64::LDPSpre;
495 case AArch64::LDPSWi:
496 return AArch64::LDPSWpre;
497 case AArch64::LDPDi:
498 return AArch64::LDPDpre;
499 case AArch64::LDPQi:
500 return AArch64::LDPQpre;
501 case AArch64::LDPWi:
502 return AArch64::LDPWpre;
503 case AArch64::LDPXi:
504 return AArch64::LDPXpre;
505 case AArch64::STPSi:
506 return AArch64::STPSpre;
507 case AArch64::STPDi:
508 return AArch64::STPDpre;
509 case AArch64::STPQi:
510 return AArch64::STPQpre;
511 case AArch64::STPWi:
512 return AArch64::STPWpre;
513 case AArch64::STPXi:
514 return AArch64::STPXpre;
515 case AArch64::STGi:
516 return AArch64::STGPreIndex;
517 case AArch64::STZGi:
518 return AArch64::STZGPreIndex;
519 case AArch64::ST2Gi:
520 return AArch64::ST2GPreIndex;
521 case AArch64::STZ2Gi:
522 return AArch64::STZ2GPreIndex;
523 case AArch64::STGPi:
524 return AArch64::STGPpre;
525 }
526}
527
528static unsigned getBaseAddressOpcode(unsigned Opc) {
529 // TODO: Add more index address stores.
530 switch (Opc) {
531 default:
532 llvm_unreachable("Opcode has no base address equivalent!");
533 case AArch64::LDRBroX:
534 return AArch64::LDRBui;
535 case AArch64::LDRBBroX:
536 return AArch64::LDRBBui;
537 case AArch64::LDRSBXroX:
538 return AArch64::LDRSBXui;
539 case AArch64::LDRSBWroX:
540 return AArch64::LDRSBWui;
541 case AArch64::LDRHroX:
542 return AArch64::LDRHui;
543 case AArch64::LDRHHroX:
544 return AArch64::LDRHHui;
545 case AArch64::LDRSHXroX:
546 return AArch64::LDRSHXui;
547 case AArch64::LDRSHWroX:
548 return AArch64::LDRSHWui;
549 case AArch64::LDRWroX:
550 return AArch64::LDRWui;
551 case AArch64::LDRSroX:
552 return AArch64::LDRSui;
553 case AArch64::LDRSWroX:
554 return AArch64::LDRSWui;
555 case AArch64::LDRDroX:
556 return AArch64::LDRDui;
557 case AArch64::LDRXroX:
558 return AArch64::LDRXui;
559 case AArch64::LDRQroX:
560 return AArch64::LDRQui;
561 }
562}
563
564static unsigned getPostIndexedOpcode(unsigned Opc) {
565 switch (Opc) {
566 default:
567 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
568 case AArch64::STRBui:
569 return AArch64::STRBpost;
570 case AArch64::STRHui:
571 return AArch64::STRHpost;
572 case AArch64::STRSui:
573 case AArch64::STURSi:
574 return AArch64::STRSpost;
575 case AArch64::STRDui:
576 case AArch64::STURDi:
577 return AArch64::STRDpost;
578 case AArch64::STRQui:
579 case AArch64::STURQi:
580 return AArch64::STRQpost;
581 case AArch64::STRBBui:
582 return AArch64::STRBBpost;
583 case AArch64::STRHHui:
584 return AArch64::STRHHpost;
585 case AArch64::STRWui:
586 case AArch64::STURWi:
587 return AArch64::STRWpost;
588 case AArch64::STRXui:
589 case AArch64::STURXi:
590 return AArch64::STRXpost;
591 case AArch64::LDRBui:
592 return AArch64::LDRBpost;
593 case AArch64::LDRHui:
594 return AArch64::LDRHpost;
595 case AArch64::LDRSui:
596 case AArch64::LDURSi:
597 return AArch64::LDRSpost;
598 case AArch64::LDRDui:
599 case AArch64::LDURDi:
600 return AArch64::LDRDpost;
601 case AArch64::LDRQui:
602 case AArch64::LDURQi:
603 return AArch64::LDRQpost;
604 case AArch64::LDRBBui:
605 return AArch64::LDRBBpost;
606 case AArch64::LDRHHui:
607 return AArch64::LDRHHpost;
608 case AArch64::LDRWui:
609 case AArch64::LDURWi:
610 return AArch64::LDRWpost;
611 case AArch64::LDRXui:
612 case AArch64::LDURXi:
613 return AArch64::LDRXpost;
614 case AArch64::LDRSWui:
615 return AArch64::LDRSWpost;
616 case AArch64::LDPSi:
617 return AArch64::LDPSpost;
618 case AArch64::LDPSWi:
619 return AArch64::LDPSWpost;
620 case AArch64::LDPDi:
621 return AArch64::LDPDpost;
622 case AArch64::LDPQi:
623 return AArch64::LDPQpost;
624 case AArch64::LDPWi:
625 return AArch64::LDPWpost;
626 case AArch64::LDPXi:
627 return AArch64::LDPXpost;
628 case AArch64::STPSi:
629 return AArch64::STPSpost;
630 case AArch64::STPDi:
631 return AArch64::STPDpost;
632 case AArch64::STPQi:
633 return AArch64::STPQpost;
634 case AArch64::STPWi:
635 return AArch64::STPWpost;
636 case AArch64::STPXi:
637 return AArch64::STPXpost;
638 case AArch64::STGi:
639 return AArch64::STGPostIndex;
640 case AArch64::STZGi:
641 return AArch64::STZGPostIndex;
642 case AArch64::ST2Gi:
643 return AArch64::ST2GPostIndex;
644 case AArch64::STZ2Gi:
645 return AArch64::STZ2GPostIndex;
646 case AArch64::STGPi:
647 return AArch64::STGPpost;
648 }
649}
650
652
653 unsigned OpcA = FirstMI.getOpcode();
654 unsigned OpcB = MI.getOpcode();
655
656 switch (OpcA) {
657 default:
658 return false;
659 case AArch64::STRSpre:
660 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
661 case AArch64::STRDpre:
662 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
663 case AArch64::STRQpre:
664 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
665 case AArch64::STRWpre:
666 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
667 case AArch64::STRXpre:
668 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
669 case AArch64::LDRSpre:
670 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
671 case AArch64::LDRDpre:
672 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
673 case AArch64::LDRQpre:
674 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
675 case AArch64::LDRWpre:
676 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
677 case AArch64::LDRXpre:
678 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
679 case AArch64::LDRSWpre:
680 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
681 }
682}
683
684// Returns the scale and offset range of pre/post indexed variants of MI.
685static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
686 int &MinOffset, int &MaxOffset) {
687 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
688 bool IsTagStore = isTagStore(MI);
689 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
690 // as in the "unsigned offset" variant.
691 // All other pre/post indexed ldst instructions are unscaled.
692 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
693
694 if (IsPaired) {
695 MinOffset = -64;
696 MaxOffset = 63;
697 } else {
698 MinOffset = -256;
699 MaxOffset = 255;
700 }
701}
702
704 unsigned PairedRegOp = 0) {
705 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
706 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
707 if (IsPreLdSt)
708 PairedRegOp += 1;
709 unsigned Idx =
710 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
711 return MI.getOperand(Idx);
712}
713
716 const AArch64InstrInfo *TII) {
717 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
718 int LoadSize = TII->getMemScale(LoadInst);
719 int StoreSize = TII->getMemScale(StoreInst);
720 int UnscaledStOffset =
721 TII->hasUnscaledLdStOffset(StoreInst)
724 int UnscaledLdOffset =
725 TII->hasUnscaledLdStOffset(LoadInst)
728 return (UnscaledStOffset <= UnscaledLdOffset) &&
729 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
730}
731
733 unsigned Opc = MI.getOpcode();
734 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
735 isNarrowStore(Opc)) &&
736 getLdStRegOp(MI).getReg() == AArch64::WZR;
737}
738
740 switch (MI.getOpcode()) {
741 default:
742 return false;
743 // Scaled instructions.
744 case AArch64::LDRBBui:
745 case AArch64::LDRHHui:
746 case AArch64::LDRWui:
747 case AArch64::LDRXui:
748 // Unscaled instructions.
749 case AArch64::LDURBBi:
750 case AArch64::LDURHHi:
751 case AArch64::LDURWi:
752 case AArch64::LDURXi:
753 return true;
754 }
755}
756
758 unsigned Opc = MI.getOpcode();
759 switch (Opc) {
760 default:
761 return false;
762 // Scaled instructions.
763 case AArch64::STRBui:
764 case AArch64::STRHui:
765 case AArch64::STRSui:
766 case AArch64::STRDui:
767 case AArch64::STRQui:
768 case AArch64::STRXui:
769 case AArch64::STRWui:
770 case AArch64::STRHHui:
771 case AArch64::STRBBui:
772 case AArch64::LDRBui:
773 case AArch64::LDRHui:
774 case AArch64::LDRSui:
775 case AArch64::LDRDui:
776 case AArch64::LDRQui:
777 case AArch64::LDRXui:
778 case AArch64::LDRWui:
779 case AArch64::LDRHHui:
780 case AArch64::LDRBBui:
781 case AArch64::STGi:
782 case AArch64::STZGi:
783 case AArch64::ST2Gi:
784 case AArch64::STZ2Gi:
785 case AArch64::STGPi:
786 // Unscaled instructions.
787 case AArch64::STURSi:
788 case AArch64::STURDi:
789 case AArch64::STURQi:
790 case AArch64::STURWi:
791 case AArch64::STURXi:
792 case AArch64::LDURSi:
793 case AArch64::LDURDi:
794 case AArch64::LDURQi:
795 case AArch64::LDURWi:
796 case AArch64::LDURXi:
797 // Paired instructions.
798 case AArch64::LDPSi:
799 case AArch64::LDPSWi:
800 case AArch64::LDPDi:
801 case AArch64::LDPQi:
802 case AArch64::LDPWi:
803 case AArch64::LDPXi:
804 case AArch64::STPSi:
805 case AArch64::STPDi:
806 case AArch64::STPQi:
807 case AArch64::STPWi:
808 case AArch64::STPXi:
809 // Make sure this is a reg+imm (as opposed to an address reloc).
811 return false;
812
813 // When using stack tagging, simple sp+imm loads and stores are not
814 // tag-checked, but pre- and post-indexed versions of them are, so we can't
815 // replace the former with the latter. This transformation would be valid
816 // if the load/store accesses an untagged stack slot, but we don't have
817 // that information available after frame indices have been eliminated.
818 if (AFI.isMTETagged() &&
819 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
820 return false;
821
822 return true;
823 }
824}
825
826// Make sure this is a reg+reg Ld/St
827static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
828 unsigned Opc = MI.getOpcode();
829 switch (Opc) {
830 default:
831 return false;
832 // Scaled instructions.
833 // TODO: Add more index address stores.
834 case AArch64::LDRBroX:
835 case AArch64::LDRBBroX:
836 case AArch64::LDRSBXroX:
837 case AArch64::LDRSBWroX:
838 Scale = 1;
839 return true;
840 case AArch64::LDRHroX:
841 case AArch64::LDRHHroX:
842 case AArch64::LDRSHXroX:
843 case AArch64::LDRSHWroX:
844 Scale = 2;
845 return true;
846 case AArch64::LDRWroX:
847 case AArch64::LDRSroX:
848 case AArch64::LDRSWroX:
849 Scale = 4;
850 return true;
851 case AArch64::LDRDroX:
852 case AArch64::LDRXroX:
853 Scale = 8;
854 return true;
855 case AArch64::LDRQroX:
856 Scale = 16;
857 return true;
858 }
859}
860
862 switch (MO.getParent()->getOpcode()) {
863 default:
864 return MO.isRenamable();
865 case AArch64::ORRWrs:
866 case AArch64::ADDWri:
867 return true;
868 }
869}
870
872AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
874 const LdStPairFlags &Flags) {
876 "Expected promotable zero stores.");
877
878 MachineBasicBlock::iterator E = I->getParent()->end();
880 // If NextI is the second of the two instructions to be merged, we need
881 // to skip one further. Either way we merge will invalidate the iterator,
882 // and we don't need to scan the new instruction, as it's a pairwise
883 // instruction, which we're not considering for further action anyway.
884 if (NextI == MergeMI)
885 NextI = next_nodbg(NextI, E);
886
887 unsigned Opc = I->getOpcode();
888 unsigned MergeMIOpc = MergeMI->getOpcode();
889 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
890 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
891 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
892 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
893
894 bool MergeForward = Flags.getMergeForward();
895 // Insert our new paired instruction after whichever of the paired
896 // instructions MergeForward indicates.
897 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
898 // Also based on MergeForward is from where we copy the base register operand
899 // so we get the flags compatible with the input code.
900 const MachineOperand &BaseRegOp =
901 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
902 : AArch64InstrInfo::getLdStBaseOp(*I);
903
904 // Which register is Rt and which is Rt2 depends on the offset order.
905 int64_t IOffsetInBytes =
906 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
907 int64_t MIOffsetInBytes =
909 MergeMIOffsetStride;
910 // Select final offset based on the offset order.
911 int64_t OffsetImm;
912 if (IOffsetInBytes > MIOffsetInBytes)
913 OffsetImm = MIOffsetInBytes;
914 else
915 OffsetImm = IOffsetInBytes;
916
917 int NewOpcode = getMatchingWideOpcode(Opc);
918 // Adjust final offset on scaled stores because the new instruction
919 // has a different scale.
920 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
921 int NewOffsetStride = TII->getMemScale(NewOpcode);
922 assert(((OffsetImm % NewOffsetStride) == 0) &&
923 "Offset should be a multiple of the store memory scale");
924 OffsetImm = OffsetImm / NewOffsetStride;
925 }
926
927 // Construct the new instruction.
928 DebugLoc DL = I->getDebugLoc();
929 MachineBasicBlock *MBB = I->getParent();
930 MachineInstrBuilder MIB;
931 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
932 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
933 .add(BaseRegOp)
934 .addImm(OffsetImm)
935 .cloneMergedMemRefs({&*I, &*MergeMI})
936 .setMIFlags(I->mergeFlagsWith(*MergeMI));
937 (void)MIB;
938
939 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
940 LLVM_DEBUG(I->print(dbgs()));
941 LLVM_DEBUG(dbgs() << " ");
942 LLVM_DEBUG(MergeMI->print(dbgs()));
943 LLVM_DEBUG(dbgs() << " with instruction:\n ");
944 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
945 LLVM_DEBUG(dbgs() << "\n");
946
947 // Erase the old instructions.
948 I->eraseFromParent();
949 MergeMI->eraseFromParent();
950 return NextI;
951}
952
953// Apply Fn to all instructions between MI and the beginning of the block, until
954// a def for DefReg is reached. Returns true, iff Fn returns true for all
955// visited instructions. Stop after visiting Limit iterations.
957 const TargetRegisterInfo *TRI, unsigned Limit,
958 std::function<bool(MachineInstr &, bool)> &Fn) {
959 auto MBB = MI.getParent();
960 for (MachineInstr &I :
961 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
962 if (!Limit)
963 return false;
964 --Limit;
965
966 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
967 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
968 TRI->regsOverlap(MOP.getReg(), DefReg);
969 });
970 if (!Fn(I, isDef))
971 return false;
972 if (isDef)
973 break;
974 }
975 return true;
976}
977
979 const TargetRegisterInfo *TRI) {
980
981 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
982 if (MOP.isReg() && MOP.isKill())
983 Units.removeReg(MOP.getReg());
984
985 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
986 if (MOP.isReg() && !MOP.isKill())
987 Units.addReg(MOP.getReg());
988}
989
990/// This function will add a new entry into the debugValueSubstitutions table
991/// when two instruction have been merged into a new one represented by \p
992/// MergedInstr.
994 unsigned InstrNumToSet,
995 MachineInstr &OriginalInstr,
996 MachineInstr &MergedInstr) {
997
998 // Figure out the Operand Index of the destination register of the
999 // OriginalInstr in the new MergedInstr.
1000 auto Reg = OriginalInstr.getOperand(0).getReg();
1001 unsigned OperandNo = 0;
1002 bool RegFound = false;
1003 for (const auto Op : MergedInstr.operands()) {
1004 if (Op.getReg() == Reg) {
1005 RegFound = true;
1006 break;
1007 }
1008 OperandNo++;
1009 }
1010
1011 if (RegFound)
1012 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
1013 {InstrNumToSet, OperandNo});
1014}
1015
1017AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1019 const LdStPairFlags &Flags) {
1020 MachineBasicBlock::iterator E = I->getParent()->end();
1022 // If NextI is the second of the two instructions to be merged, we need
1023 // to skip one further. Either way we merge will invalidate the iterator,
1024 // and we don't need to scan the new instruction, as it's a pairwise
1025 // instruction, which we're not considering for further action anyway.
1026 if (NextI == Paired)
1027 NextI = next_nodbg(NextI, E);
1028
1029 int SExtIdx = Flags.getSExtIdx();
1030 unsigned Opc =
1031 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1032 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1033 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1034
1035 bool MergeForward = Flags.getMergeForward();
1036
1037 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1038 if (RenameReg) {
1039 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1040 DefinedInBB.addReg(*RenameReg);
1041
1042 // Return the sub/super register for RenameReg, matching the size of
1043 // OriginalReg.
1044 auto GetMatchingSubReg =
1045 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1046 for (MCPhysReg SubOrSuper :
1047 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1048 if (C->contains(SubOrSuper))
1049 return SubOrSuper;
1050 }
1051 llvm_unreachable("Should have found matching sub or super register!");
1052 };
1053
1054 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1055 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1056 bool IsDef) {
1057 if (IsDef) {
1058 bool SeenDef = false;
1059 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1060 MachineOperand &MOP = MI.getOperand(OpIdx);
1061 // Rename the first explicit definition and all implicit
1062 // definitions matching RegToRename.
1063 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1064 (!MergeForward || !SeenDef ||
1065 (MOP.isDef() && MOP.isImplicit())) &&
1066 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1067 assert((MOP.isImplicit() ||
1068 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1069 "Need renamable operands");
1070 Register MatchingReg;
1071 if (const TargetRegisterClass *RC =
1072 MI.getRegClassConstraint(OpIdx, TII, TRI))
1073 MatchingReg = GetMatchingSubReg(RC);
1074 else {
1075 if (!isRewritableImplicitDef(MOP))
1076 continue;
1077 MatchingReg = GetMatchingSubReg(
1078 TRI->getMinimalPhysRegClass(MOP.getReg()));
1079 }
1080 MOP.setReg(MatchingReg);
1081 SeenDef = true;
1082 }
1083 }
1084 } else {
1085 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1086 MachineOperand &MOP = MI.getOperand(OpIdx);
1087 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1088 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1089 assert((MOP.isImplicit() ||
1090 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1091 "Need renamable operands");
1092 Register MatchingReg;
1093 if (const TargetRegisterClass *RC =
1094 MI.getRegClassConstraint(OpIdx, TII, TRI))
1095 MatchingReg = GetMatchingSubReg(RC);
1096 else
1097 MatchingReg = GetMatchingSubReg(
1098 TRI->getMinimalPhysRegClass(MOP.getReg()));
1099 assert(MatchingReg != AArch64::NoRegister &&
1100 "Cannot find matching regs for renaming");
1101 MOP.setReg(MatchingReg);
1102 }
1103 }
1104 }
1105 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1106 return true;
1107 };
1108 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1109 TRI, UINT32_MAX, UpdateMIs);
1110
1111#if !defined(NDEBUG)
1112 // For forward merging store:
1113 // Make sure the register used for renaming is not used between the
1114 // paired instructions. That would trash the content before the new
1115 // paired instruction.
1116 MCPhysReg RegToCheck = *RenameReg;
1117 // For backward merging load:
1118 // Make sure the register being renamed is not used between the
1119 // paired instructions. That would trash the content after the new
1120 // paired instruction.
1121 if (!MergeForward)
1122 RegToCheck = RegToRename;
1123 for (auto &MI :
1124 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1125 MergeForward ? std::next(I) : I,
1126 MergeForward ? std::next(Paired) : Paired))
1127 assert(all_of(MI.operands(),
1128 [this, RegToCheck](const MachineOperand &MOP) {
1129 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1130 MOP.isUndef() ||
1131 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1132 }) &&
1133 "Rename register used between paired instruction, trashing the "
1134 "content");
1135#endif
1136 }
1137
1138 // Insert our new paired instruction after whichever of the paired
1139 // instructions MergeForward indicates.
1140 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1141 // Also based on MergeForward is from where we copy the base register operand
1142 // so we get the flags compatible with the input code.
1143 const MachineOperand &BaseRegOp =
1144 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1145 : AArch64InstrInfo::getLdStBaseOp(*I);
1146
1148 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1149 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1150 if (IsUnscaled != PairedIsUnscaled) {
1151 // We're trying to pair instructions that differ in how they are scaled. If
1152 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1153 // the opposite (i.e., make Paired's offset unscaled).
1154 int MemSize = TII->getMemScale(*Paired);
1155 if (PairedIsUnscaled) {
1156 // If the unscaled offset isn't a multiple of the MemSize, we can't
1157 // pair the operations together.
1158 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1159 "Offset should be a multiple of the stride!");
1160 PairedOffset /= MemSize;
1161 } else {
1162 PairedOffset *= MemSize;
1163 }
1164 }
1165
1166 // Which register is Rt and which is Rt2 depends on the offset order.
1167 // However, for pre load/stores the Rt should be the one of the pre
1168 // load/store.
1169 MachineInstr *RtMI, *Rt2MI;
1170 if (Offset == PairedOffset + OffsetStride &&
1172 RtMI = &*Paired;
1173 Rt2MI = &*I;
1174 // Here we swapped the assumption made for SExtIdx.
1175 // I.e., we turn ldp I, Paired into ldp Paired, I.
1176 // Update the index accordingly.
1177 if (SExtIdx != -1)
1178 SExtIdx = (SExtIdx + 1) % 2;
1179 } else {
1180 RtMI = &*I;
1181 Rt2MI = &*Paired;
1182 }
1183 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1184 // Scale the immediate offset, if necessary.
1185 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1186 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1187 "Unscaled offset cannot be scaled.");
1188 OffsetImm /= TII->getMemScale(*RtMI);
1189 }
1190
1191 // Construct the new instruction.
1192 MachineInstrBuilder MIB;
1193 DebugLoc DL = I->getDebugLoc();
1194 MachineBasicBlock *MBB = I->getParent();
1195 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1196 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1197 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1198 // Kill flags may become invalid when moving stores for pairing.
1199 if (RegOp0.isUse()) {
1200 if (!MergeForward) {
1201 // Clear kill flags on store if moving upwards. Example:
1202 // STRWui kill %w0, ...
1203 // USE %w1
1204 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1205 // We are about to move the store of w1, so its kill flag may become
1206 // invalid; not the case for w0.
1207 // Since w1 is used between the stores, the kill flag on w1 is cleared
1208 // after merging.
1209 // STPWi kill %w0, %w1, ...
1210 // USE %w1
1211 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1212 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1213 PairedRegOp.setIsKill(false);
1214 } else {
1215 // Clear kill flags of the first stores register. Example:
1216 // STRWui %w1, ...
1217 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1218 // STRW %w0
1220 for (MachineInstr &MI :
1221 make_range(std::next(I->getIterator()), Paired->getIterator()))
1222 MI.clearRegisterKills(Reg, TRI);
1223 }
1224 }
1225
1226 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1227 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1228
1229 // Adds the pre-index operand for pre-indexed ld/st pairs.
1230 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1231 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1232
1233 MIB.add(RegOp0)
1234 .add(RegOp1)
1235 .add(BaseRegOp)
1236 .addImm(OffsetImm)
1237 .cloneMergedMemRefs({&*I, &*Paired})
1238 .setMIFlags(I->mergeFlagsWith(*Paired));
1239
1240 (void)MIB;
1241
1242 LLVM_DEBUG(
1243 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1244 LLVM_DEBUG(I->print(dbgs()));
1245 LLVM_DEBUG(dbgs() << " ");
1246 LLVM_DEBUG(Paired->print(dbgs()));
1247 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1248 if (SExtIdx != -1) {
1249 // Generate the sign extension for the proper result of the ldp.
1250 // I.e., with X1, that would be:
1251 // %w1 = KILL %w1, implicit-def %x1
1252 // %x1 = SBFMXri killed %x1, 0, 31
1253 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1254 // Right now, DstMO has the extended register, since it comes from an
1255 // extended opcode.
1256 Register DstRegX = DstMO.getReg();
1257 // Get the W variant of that register.
1258 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1259 // Update the result of LDP to use the W instead of the X variant.
1260 DstMO.setReg(DstRegW);
1261 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1262 LLVM_DEBUG(dbgs() << "\n");
1263 // Make the machine verifier happy by providing a definition for
1264 // the X register.
1265 // Insert this definition right after the generated LDP, i.e., before
1266 // InsertionPoint.
1267 MachineInstrBuilder MIBKill =
1268 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1269 .addReg(DstRegW)
1270 .addReg(DstRegX, RegState::Define);
1271 MIBKill->getOperand(2).setImplicit();
1272 // Create the sign extension.
1273 MachineInstrBuilder MIBSXTW =
1274 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1275 .addReg(DstRegX)
1276 .addImm(0)
1277 .addImm(31);
1278 (void)MIBSXTW;
1279
1280 // In the case of a sign-extend, where we have something like:
1281 // debugValueSubstitutions:[]
1282 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1283 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1284 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1285 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1286
1287 // It will be converted to:
1288 // debugValueSubstitutions:[]
1289 // $w0, $w1 = LDPWi $x0, 0
1290 // $w0 = KILL $w0, implicit-def $x0
1291 // $x0 = SBFMXri $x0, 0, 31
1292 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1293 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1294
1295 // We want the final result to look like:
1296 // debugValueSubstitutions:
1297 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1298 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1299 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1300 // $w0 = KILL $w0, implicit-def $x0
1301 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1302 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1303 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1304
1305 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1306 // instruction contains the final value we care about we give it a new
1307 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1308 // about, therefore the LDP instruction is also given a new
1309 // debug-instr-number 4. We have to add these substitutions to the
1310 // debugValueSubstitutions table. However, we also have to ensure that the
1311 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1312 // $w1 is the second operand of the LDP instruction.
1313
1314 if (I->peekDebugInstrNum()) {
1315 // If I is the instruction which got sign extended and has a
1316 // debug-instr-number, give the SBFMXri instruction a new
1317 // debug-instr-number, and update the debugValueSubstitutions table with
1318 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1319 // instruction a new debug-instr-number, and update the
1320 // debugValueSubstitutions table with the new debug-instr-number and
1321 // OpIndex pair.
1322 unsigned NewInstrNum;
1323 if (DstRegX == I->getOperand(0).getReg()) {
1324 NewInstrNum = MIBSXTW->getDebugInstrNum();
1325 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1326 *MIBSXTW);
1327 } else {
1328 NewInstrNum = MIB->getDebugInstrNum();
1329 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1330 }
1331 }
1332 if (Paired->peekDebugInstrNum()) {
1333 // If Paired is the instruction which got sign extended and has a
1334 // debug-instr-number, give the SBFMXri instruction a new
1335 // debug-instr-number, and update the debugValueSubstitutions table with
1336 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1337 // instruction a new debug-instr-number, and update the
1338 // debugValueSubstitutions table with the new debug-instr-number and
1339 // OpIndex pair.
1340 unsigned NewInstrNum;
1341 if (DstRegX == Paired->getOperand(0).getReg()) {
1342 NewInstrNum = MIBSXTW->getDebugInstrNum();
1343 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1344 *MIBSXTW);
1345 } else {
1346 NewInstrNum = MIB->getDebugInstrNum();
1347 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1348 *MIB);
1349 }
1350 }
1351
1352 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1353 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1354 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1355 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1356 // variant of the registers.
1357 MachineOperand &MOp0 = MIB->getOperand(0);
1358 MachineOperand &MOp1 = MIB->getOperand(1);
1359 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1360 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1361 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1362 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1363 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1364 } else {
1365
1366 // In the case that the merge doesn't result in a sign-extend, if we have
1367 // something like:
1368 // debugValueSubstitutions:[]
1369 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1370 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1371 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1372 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1373
1374 // It will be converted to:
1375 // debugValueSubstitutions: []
1376 // $x0, $x1 = LDPXi $x0, 0
1377 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1378 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1379
1380 // We want the final result to look like:
1381 // debugValueSubstitutions:
1382 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1383 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1384 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1385 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1386 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1387
1388 // Here all that needs to be done is, that the LDP instruction needs to be
1389 // updated with a new debug-instr-number, we then need to add entries into
1390 // the debugSubstitutions table to map the old instr-refs to the new ones.
1391
1392 // Assign new DebugInstrNum to the Paired instruction.
1393 if (I->peekDebugInstrNum()) {
1394 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1395 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1396 *MIB);
1397 }
1398 if (Paired->peekDebugInstrNum()) {
1399 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1400 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1401 *MIB);
1402 }
1403
1404 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1405 }
1406 LLVM_DEBUG(dbgs() << "\n");
1407
1408 if (MergeForward)
1409 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1410 if (MOP.isReg() && MOP.isKill())
1411 DefinedInBB.addReg(MOP.getReg());
1412
1413 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1414 // only copies implicit defs and makes sure that each operand is only added
1415 // once in case of duplicates.
1416 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1418 SmallSetVector<Register, 4> Ops;
1419 for (const MachineOperand &MO :
1420 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1421 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1422 Ops.insert(MO.getReg());
1423 for (const MachineOperand &MO :
1424 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1425 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1426 Ops.insert(MO.getReg());
1427 for (auto Op : Ops)
1428 MIB.addDef(Op, RegState::Implicit);
1429 };
1430 CopyImplicitOps(I, Paired);
1431
1432 // Erase the old instructions.
1433 I->eraseFromParent();
1434 Paired->eraseFromParent();
1435
1436 return NextI;
1437}
1438
1440AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1443 next_nodbg(LoadI, LoadI->getParent()->end());
1444
1445 int LoadSize = TII->getMemScale(*LoadI);
1446 int StoreSize = TII->getMemScale(*StoreI);
1447 Register LdRt = getLdStRegOp(*LoadI).getReg();
1448 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1449 Register StRt = getLdStRegOp(*StoreI).getReg();
1450 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1451
1452 assert((IsStoreXReg ||
1453 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1454 "Unexpected RegClass");
1455
1456 MachineInstr *BitExtMI;
1457 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1458 // Remove the load, if the destination register of the loads is the same
1459 // register for stored value.
1460 if (StRt == LdRt && LoadSize == 8) {
1461 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1462 LoadI->getIterator())) {
1463 if (MI.killsRegister(StRt, TRI)) {
1464 MI.clearRegisterKills(StRt, TRI);
1465 break;
1466 }
1467 }
1468 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1469 LLVM_DEBUG(LoadI->print(dbgs()));
1470 LLVM_DEBUG(dbgs() << "\n");
1471 LoadI->eraseFromParent();
1472 return NextI;
1473 }
1474 // Replace the load with a mov if the load and store are in the same size.
1475 BitExtMI =
1476 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1477 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1478 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1479 .add(StMO)
1481 .setMIFlags(LoadI->getFlags());
1482 } else {
1483 // FIXME: Currently we disable this transformation in big-endian targets as
1484 // performance and correctness are verified only in little-endian.
1485 if (!Subtarget->isLittleEndian())
1486 return NextI;
1487 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1488 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1489 "Unsupported ld/st match");
1490 assert(LoadSize <= StoreSize && "Invalid load size");
1491 int UnscaledLdOffset =
1492 IsUnscaled
1494 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1495 int UnscaledStOffset =
1496 IsUnscaled
1498 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1499 int Width = LoadSize * 8;
1500 Register DestReg =
1501 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1502 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1503 : LdRt;
1504
1505 assert((UnscaledLdOffset >= UnscaledStOffset &&
1506 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1507 "Invalid offset");
1508
1509 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1510 int Imms = Immr + Width - 1;
1511 if (UnscaledLdOffset == UnscaledStOffset) {
1512 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1513 | ((Immr) << 6) // immr
1514 | ((Imms) << 0) // imms
1515 ;
1516
1517 BitExtMI =
1518 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1519 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1520 DestReg)
1521 .add(StMO)
1522 .addImm(AndMaskEncoded)
1523 .setMIFlags(LoadI->getFlags());
1524 } else if (IsStoreXReg && Imms == 31) {
1525 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1526 // instruction.
1527 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1528 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(AArch64::UBFMWri),
1530 TRI->getSubReg(DestReg, AArch64::sub_32))
1531 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1532 .addImm(Immr)
1533 .addImm(Imms)
1534 .setMIFlags(LoadI->getFlags());
1535 } else {
1536 BitExtMI =
1537 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1538 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1539 DestReg)
1540 .add(StMO)
1541 .addImm(Immr)
1542 .addImm(Imms)
1543 .setMIFlags(LoadI->getFlags());
1544 }
1545 }
1546
1547 // Clear kill flags between store and load.
1548 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1549 BitExtMI->getIterator()))
1550 if (MI.killsRegister(StRt, TRI)) {
1551 MI.clearRegisterKills(StRt, TRI);
1552 break;
1553 }
1554
1555 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1556 LLVM_DEBUG(StoreI->print(dbgs()));
1557 LLVM_DEBUG(dbgs() << " ");
1558 LLVM_DEBUG(LoadI->print(dbgs()));
1559 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1560 LLVM_DEBUG(StoreI->print(dbgs()));
1561 LLVM_DEBUG(dbgs() << " ");
1562 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1563 LLVM_DEBUG(dbgs() << "\n");
1564
1565 // Erase the old instructions.
1566 LoadI->eraseFromParent();
1567 return NextI;
1568}
1569
1570static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1571 // Convert the byte-offset used by unscaled into an "element" offset used
1572 // by the scaled pair load/store instructions.
1573 if (IsUnscaled) {
1574 // If the byte-offset isn't a multiple of the stride, there's no point
1575 // trying to match it.
1576 if (Offset % OffsetStride)
1577 return false;
1578 Offset /= OffsetStride;
1579 }
1580 return Offset <= 63 && Offset >= -64;
1581}
1582
1583// Do alignment, specialized to power of 2 and for signed ints,
1584// avoiding having to do a C-style cast from uint_64t to int when
1585// using alignTo from include/llvm/Support/MathExtras.h.
1586// FIXME: Move this function to include/MathExtras.h?
1587static int alignTo(int Num, int PowOf2) {
1588 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1589}
1590
1591static bool mayAlias(MachineInstr &MIa,
1593 AliasAnalysis *AA) {
1594 for (MachineInstr *MIb : MemInsns) {
1595 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1596 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1597 return true;
1598 }
1599 }
1600
1601 LLVM_DEBUG(dbgs() << "No aliases found\n");
1602 return false;
1603}
1604
1605bool AArch64LoadStoreOpt::findMatchingStore(
1606 MachineBasicBlock::iterator I, unsigned Limit,
1608 MachineBasicBlock::iterator B = I->getParent()->begin();
1610 MachineInstr &LoadMI = *I;
1612
1613 // If the load is the first instruction in the block, there's obviously
1614 // not any matching store.
1615 if (MBBI == B)
1616 return false;
1617
1618 // Track which register units have been modified and used between the first
1619 // insn and the second insn.
1620 ModifiedRegUnits.clear();
1621 UsedRegUnits.clear();
1622
1623 unsigned Count = 0;
1624 do {
1625 MBBI = prev_nodbg(MBBI, B);
1626 MachineInstr &MI = *MBBI;
1627
1628 // Don't count transient instructions towards the search limit since there
1629 // may be different numbers of them if e.g. debug information is present.
1630 if (!MI.isTransient())
1631 ++Count;
1632
1633 // If the load instruction reads directly from the address to which the
1634 // store instruction writes and the stored value is not modified, we can
1635 // promote the load. Since we do not handle stores with pre-/post-index,
1636 // it's unnecessary to check if BaseReg is modified by the store itself.
1637 // Also we can't handle stores without an immediate offset operand,
1638 // while the operand might be the address for a global variable.
1639 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1642 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1643 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1644 StoreI = MBBI;
1645 return true;
1646 }
1647
1648 if (MI.isCall())
1649 return false;
1650
1651 // Update modified / uses register units.
1652 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1653
1654 // Otherwise, if the base register is modified, we have no match, so
1655 // return early.
1656 if (!ModifiedRegUnits.available(BaseReg))
1657 return false;
1658
1659 // If we encounter a store aliased with the load, return early.
1660 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1661 return false;
1662 } while (MBBI != B && Count < Limit);
1663 return false;
1664}
1665
1666static bool needsWinCFI(const MachineFunction *MF) {
1667 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1669}
1670
1671// Returns true if FirstMI and MI are candidates for merging or pairing.
1672// Otherwise, returns false.
1674 LdStPairFlags &Flags,
1675 const AArch64InstrInfo *TII) {
1676 // If this is volatile or if pairing is suppressed, not a candidate.
1677 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1678 return false;
1679
1680 // We should have already checked FirstMI for pair suppression and volatility.
1681 assert(!FirstMI.hasOrderedMemoryRef() &&
1682 !TII->isLdStPairSuppressed(FirstMI) &&
1683 "FirstMI shouldn't get here if either of these checks are true.");
1684
1685 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1687 return false;
1688
1689 unsigned OpcA = FirstMI.getOpcode();
1690 unsigned OpcB = MI.getOpcode();
1691
1692 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1693 if (OpcA == OpcB)
1694 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1695
1696 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1697 // allow pairing them with other instructions.
1698 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1699 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1700 return false;
1701
1702 // Two pre ld/st of different opcodes cannot be merged either
1704 return false;
1705
1706 // Try to match a sign-extended load/store with a zero-extended load/store.
1707 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1708 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1709 assert(IsValidLdStrOpc &&
1710 "Given Opc should be a Load or Store with an immediate");
1711 // OpcA will be the first instruction in the pair.
1712 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1713 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1714 return true;
1715 }
1716
1717 // If the second instruction isn't even a mergable/pairable load/store, bail
1718 // out.
1719 if (!PairIsValidLdStrOpc)
1720 return false;
1721
1722 // Narrow stores do not have a matching pair opcodes, so constrain their
1723 // merging to zero stores.
1724 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1725 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1726 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1727 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1728
1729 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1730 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1731 // are candidate pairs that can be merged.
1732 if (isPreLdStPairCandidate(FirstMI, MI))
1733 return true;
1734
1735 // Try to match an unscaled load/store with a scaled load/store.
1736 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1738
1739 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1740}
1741
1742static bool canRenameMOP(const MachineOperand &MOP,
1743 const TargetRegisterInfo *TRI) {
1744 if (MOP.isReg()) {
1745 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1746 // Renaming registers with multiple disjunct sub-registers (e.g. the
1747 // result of a LD3) means that all sub-registers are renamed, potentially
1748 // impacting other instructions we did not check. Bail out.
1749 // Note that this relies on the structure of the AArch64 register file. In
1750 // particular, a subregister cannot be written without overwriting the
1751 // whole register.
1752 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1753 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1754 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1755 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1756 LLVM_DEBUG(
1757 dbgs()
1758 << " Cannot rename operands with multiple disjunct subregisters ("
1759 << MOP << ")\n");
1760 return false;
1761 }
1762
1763 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1764 // them must be known. For example, in ORRWrs the implicit-def
1765 // corresponds to the result register.
1766 if (MOP.isImplicit() && MOP.isDef()) {
1767 if (!isRewritableImplicitDef(MOP))
1768 return false;
1769 return TRI->isSuperOrSubRegisterEq(
1770 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1771 }
1772 }
1773 return MOP.isImplicit() ||
1774 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1775}
1776
1777static bool
1780 const TargetRegisterInfo *TRI) {
1781 if (!FirstMI.mayStore())
1782 return false;
1783
1784 // Check if we can find an unused register which we can use to rename
1785 // the register used by the first load/store.
1786
1787 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1788 // For now, we only rename if the store operand gets killed at the store.
1789 if (!getLdStRegOp(FirstMI).isKill() &&
1790 !any_of(FirstMI.operands(),
1791 [TRI, RegToRename](const MachineOperand &MOP) {
1792 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1793 MOP.isImplicit() && MOP.isKill() &&
1794 TRI->regsOverlap(RegToRename, MOP.getReg());
1795 })) {
1796 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1797 return false;
1798 }
1799
1800 bool FoundDef = false;
1801
1802 // For each instruction between FirstMI and the previous def for RegToRename,
1803 // we
1804 // * check if we can rename RegToRename in this instruction
1805 // * collect the registers used and required register classes for RegToRename.
1806 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1807 bool IsDef) {
1808 LLVM_DEBUG(dbgs() << "Checking " << MI);
1809 // Currently we do not try to rename across frame-setup instructions.
1810 if (MI.getFlag(MachineInstr::FrameSetup)) {
1811 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1812 << "currently\n");
1813 return false;
1814 }
1815
1816 UsedInBetween.accumulate(MI);
1817
1818 // For a definition, check that we can rename the definition and exit the
1819 // loop.
1820 FoundDef = IsDef;
1821
1822 // For defs, check if we can rename the first def of RegToRename.
1823 if (FoundDef) {
1824 // For some pseudo instructions, we might not generate code in the end
1825 // (e.g. KILL) and we would end up without a correct def for the rename
1826 // register.
1827 // TODO: This might be overly conservative and we could handle those cases
1828 // in multiple ways:
1829 // 1. Insert an extra copy, to materialize the def.
1830 // 2. Skip pseudo-defs until we find an non-pseudo def.
1831 if (MI.isPseudo()) {
1832 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1833 return false;
1834 }
1835
1836 for (auto &MOP : MI.operands()) {
1837 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1838 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1839 continue;
1840 if (!canRenameMOP(MOP, TRI)) {
1841 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1842 return false;
1843 }
1844 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1845 }
1846 return true;
1847 } else {
1848 for (auto &MOP : MI.operands()) {
1849 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1850 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1851 continue;
1852
1853 if (!canRenameMOP(MOP, TRI)) {
1854 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1855 return false;
1856 }
1857 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1858 }
1859 }
1860 return true;
1861 };
1862
1863 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1864 return false;
1865
1866 if (!FoundDef) {
1867 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1868 return false;
1869 }
1870 return true;
1871}
1872
1873// We want to merge the second load into the first by rewriting the usages of
1874// the same reg between first (incl.) and second (excl.). We don't need to care
1875// about any insns before FirstLoad or after SecondLoad.
1876// 1. The second load writes new value into the same reg.
1877// - The renaming is impossible to impact later use of the reg.
1878// - The second load always trash the value written by the first load which
1879// means the reg must be killed before the second load.
1880// 2. The first load must be a def for the same reg so we don't need to look
1881// into anything before it.
1883 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1884 LiveRegUnits &UsedInBetween,
1886 const TargetRegisterInfo *TRI) {
1887 if (FirstLoad.isPseudo())
1888 return false;
1889
1890 UsedInBetween.accumulate(FirstLoad);
1891 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1892 bool Success = std::all_of(
1893 FirstLoad.getIterator(), SecondLoad.getIterator(),
1894 [&](MachineInstr &MI) {
1895 LLVM_DEBUG(dbgs() << "Checking " << MI);
1896 // Currently we do not try to rename across frame-setup instructions.
1897 if (MI.getFlag(MachineInstr::FrameSetup)) {
1898 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1899 << "currently\n");
1900 return false;
1901 }
1902
1903 for (auto &MOP : MI.operands()) {
1904 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1905 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1906 continue;
1907 if (!canRenameMOP(MOP, TRI)) {
1908 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1909 return false;
1910 }
1911 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1912 }
1913
1914 return true;
1915 });
1916 return Success;
1917}
1918
1919// Check if we can find a physical register for renaming \p Reg. This register
1920// must:
1921// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1922// defined registers up to the point where the renamed register will be used,
1923// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1924// registers in the range the rename register will be used,
1925// * is available in all used register classes (checked using RequiredClasses).
1926static std::optional<MCPhysReg> tryToFindRegisterToRename(
1927 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1928 LiveRegUnits &UsedInBetween,
1930 const TargetRegisterInfo *TRI) {
1932
1933 // Checks if any sub- or super-register of PR is callee saved.
1934 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1935 return any_of(TRI->sub_and_superregs_inclusive(PR),
1936 [&MF, TRI](MCPhysReg SubOrSuper) {
1937 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1938 });
1939 };
1940
1941 // Check if PR or one of its sub- or super-registers can be used for all
1942 // required register classes.
1943 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1944 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1945 return any_of(
1946 TRI->sub_and_superregs_inclusive(PR),
1947 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1948 });
1949 };
1950
1951 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1952 for (const MCPhysReg &PR : *RegClass) {
1953 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1954 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1955 CanBeUsedForAllClasses(PR)) {
1956 DefinedInBB.addReg(PR);
1957 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1958 << "\n");
1959 return {PR};
1960 }
1961 }
1962 LLVM_DEBUG(dbgs() << "No rename register found from "
1963 << TRI->getRegClassName(RegClass) << "\n");
1964 return std::nullopt;
1965}
1966
1967// For store pairs: returns a register from FirstMI to the beginning of the
1968// block that can be renamed.
1969// For load pairs: returns a register from FirstMI to MI that can be renamed.
1970static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1971 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1972 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1974 const TargetRegisterInfo *TRI) {
1975 std::optional<MCPhysReg> RenameReg;
1976 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1977 return RenameReg;
1978
1979 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1980 MachineFunction &MF = *FirstMI.getParent()->getParent();
1981 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1982 return RenameReg;
1983
1984 const bool IsLoad = FirstMI.mayLoad();
1985
1986 if (!MaybeCanRename) {
1987 if (IsLoad)
1988 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1989 RequiredClasses, TRI)};
1990 else
1991 MaybeCanRename = {
1992 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1993 }
1994
1995 if (*MaybeCanRename) {
1996 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1997 RequiredClasses, TRI);
1998 }
1999 return RenameReg;
2000}
2001
2002/// Scan the instructions looking for a load/store that can be combined with the
2003/// current instruction into a wider equivalent or a load/store pair.
2005AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
2006 LdStPairFlags &Flags, unsigned Limit,
2007 bool FindNarrowMerge) {
2008 MachineBasicBlock::iterator E = I->getParent()->end();
2010 MachineBasicBlock::iterator MBBIWithRenameReg;
2011 MachineInstr &FirstMI = *I;
2012 MBBI = next_nodbg(MBBI, E);
2013
2014 bool MayLoad = FirstMI.mayLoad();
2015 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
2016 Register Reg = getLdStRegOp(FirstMI).getReg();
2019 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
2020 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
2021
2022 std::optional<bool> MaybeCanRename;
2023 if (!EnableRenaming)
2024 MaybeCanRename = {false};
2025
2026 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2027 LiveRegUnits UsedInBetween;
2028 UsedInBetween.init(*TRI);
2029
2030 Flags.clearRenameReg();
2031
2032 // Track which register units have been modified and used between the first
2033 // insn (inclusive) and the second insn.
2034 ModifiedRegUnits.clear();
2035 UsedRegUnits.clear();
2036
2037 // Remember any instructions that read/write memory between FirstMI and MI.
2038 SmallVector<MachineInstr *, 4> MemInsns;
2039
2040 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2041 for (unsigned Count = 0; MBBI != E && Count < Limit;
2042 MBBI = next_nodbg(MBBI, E)) {
2043 MachineInstr &MI = *MBBI;
2044 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2045
2046 UsedInBetween.accumulate(MI);
2047
2048 // Don't count transient instructions towards the search limit since there
2049 // may be different numbers of them if e.g. debug information is present.
2050 if (!MI.isTransient())
2051 ++Count;
2052
2053 Flags.setSExtIdx(-1);
2054 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2056 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2057 // If we've found another instruction with the same opcode, check to see
2058 // if the base and offset are compatible with our starting instruction.
2059 // These instructions all have scaled immediate operands, so we just
2060 // check for +1/-1. Make sure to check the new instruction offset is
2061 // actually an immediate and not a symbolic reference destined for
2062 // a relocation.
2065 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2066 if (IsUnscaled != MIIsUnscaled) {
2067 // We're trying to pair instructions that differ in how they are scaled.
2068 // If FirstMI is scaled then scale the offset of MI accordingly.
2069 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2070 int MemSize = TII->getMemScale(MI);
2071 if (MIIsUnscaled) {
2072 // If the unscaled offset isn't a multiple of the MemSize, we can't
2073 // pair the operations together: bail and keep looking.
2074 if (MIOffset % MemSize) {
2075 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2076 UsedRegUnits, TRI);
2077 MemInsns.push_back(&MI);
2078 continue;
2079 }
2080 MIOffset /= MemSize;
2081 } else {
2082 MIOffset *= MemSize;
2083 }
2084 }
2085
2086 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2087
2088 if (BaseReg == MIBaseReg) {
2089 // If the offset of the second ld/st is not equal to the size of the
2090 // destination register it can’t be paired with a pre-index ld/st
2091 // pair. Additionally if the base reg is used or modified the operations
2092 // can't be paired: bail and keep looking.
2093 if (IsPreLdSt) {
2094 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2095 bool IsBaseRegUsed = !UsedRegUnits.available(
2097 bool IsBaseRegModified = !ModifiedRegUnits.available(
2099 // If the stored value and the address of the second instruction is
2100 // the same, it needs to be using the updated register and therefore
2101 // it must not be folded.
2102 bool IsMIRegTheSame =
2103 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2105 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2106 IsMIRegTheSame) {
2107 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2108 UsedRegUnits, TRI);
2109 MemInsns.push_back(&MI);
2110 continue;
2111 }
2112 } else {
2113 if ((Offset != MIOffset + OffsetStride) &&
2114 (Offset + OffsetStride != MIOffset)) {
2115 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2116 UsedRegUnits, TRI);
2117 MemInsns.push_back(&MI);
2118 continue;
2119 }
2120 }
2121
2122 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2123 if (FindNarrowMerge) {
2124 // If the alignment requirements of the scaled wide load/store
2125 // instruction can't express the offset of the scaled narrow input,
2126 // bail and keep looking. For promotable zero stores, allow only when
2127 // the stored value is the same (i.e., WZR).
2128 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2129 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2130 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2131 UsedRegUnits, TRI);
2132 MemInsns.push_back(&MI);
2133 continue;
2134 }
2135 } else {
2136 // Pairwise instructions have a 7-bit signed offset field. Single
2137 // insns have a 12-bit unsigned offset field. If the resultant
2138 // immediate offset of merging these instructions is out of range for
2139 // a pairwise instruction, bail and keep looking.
2140 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2141 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2142 UsedRegUnits, TRI);
2143 MemInsns.push_back(&MI);
2144 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2145 << "keep looking.\n");
2146 continue;
2147 }
2148 // If the alignment requirements of the paired (scaled) instruction
2149 // can't express the offset of the unscaled input, bail and keep
2150 // looking.
2151 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2152 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2153 UsedRegUnits, TRI);
2154 MemInsns.push_back(&MI);
2156 << "Offset doesn't fit due to alignment requirements, "
2157 << "keep looking.\n");
2158 continue;
2159 }
2160 }
2161
2162 // If the BaseReg has been modified, then we cannot do the optimization.
2163 // For example, in the following pattern
2164 // ldr x1 [x2]
2165 // ldr x2 [x3]
2166 // ldr x4 [x2, #8],
2167 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2168 if (!ModifiedRegUnits.available(BaseReg))
2169 return E;
2170
2171 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2173
2174 // If the Rt of the second instruction (destination register of the
2175 // load) was not modified or used between the two instructions and none
2176 // of the instructions between the second and first alias with the
2177 // second, we can combine the second into the first.
2178 bool RtNotModified =
2179 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2180 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2181 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2182
2183 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2184 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2185 << (RtNotModified ? "true" : "false") << "\n"
2186 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2187 << (RtNotUsed ? "true" : "false") << "\n");
2188
2189 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2190 // For pairs loading into the same reg, try to find a renaming
2191 // opportunity to allow the renaming of Reg between FirstMI and MI
2192 // and combine MI into FirstMI; otherwise bail and keep looking.
2193 if (SameLoadReg) {
2194 std::optional<MCPhysReg> RenameReg =
2195 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2196 Reg, DefinedInBB, UsedInBetween,
2197 RequiredClasses, TRI);
2198 if (!RenameReg) {
2199 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2200 UsedRegUnits, TRI);
2201 MemInsns.push_back(&MI);
2202 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2203 << "keep looking.\n");
2204 continue;
2205 }
2206 Flags.setRenameReg(*RenameReg);
2207 }
2208
2209 Flags.setMergeForward(false);
2210 if (!SameLoadReg)
2211 Flags.clearRenameReg();
2212 return MBBI;
2213 }
2214
2215 // Likewise, if the Rt of the first instruction is not modified or used
2216 // between the two instructions and none of the instructions between the
2217 // first and the second alias with the first, we can combine the first
2218 // into the second.
2219 RtNotModified = !(
2220 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2221
2222 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2223 << "Reg '" << getLdStRegOp(FirstMI)
2224 << "' not modified: "
2225 << (RtNotModified ? "true" : "false") << "\n");
2226
2227 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2228 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2229 Flags.setMergeForward(true);
2230 Flags.clearRenameReg();
2231 return MBBI;
2232 }
2233
2234 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2235 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2236 RequiredClasses, TRI);
2237 if (RenameReg) {
2238 Flags.setMergeForward(true);
2239 Flags.setRenameReg(*RenameReg);
2240 MBBIWithRenameReg = MBBI;
2241 }
2242 }
2243 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2244 << "interference in between, keep looking.\n");
2245 }
2246 }
2247
2248 if (Flags.getRenameReg())
2249 return MBBIWithRenameReg;
2250
2251 // If the instruction wasn't a matching load or store. Stop searching if we
2252 // encounter a call instruction that might modify memory.
2253 if (MI.isCall()) {
2254 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2255 return E;
2256 }
2257
2258 // Update modified / uses register units.
2259 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2260
2261 // Otherwise, if the base register is modified, we have no match, so
2262 // return early.
2263 if (!ModifiedRegUnits.available(BaseReg)) {
2264 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2265 return E;
2266 }
2267
2268 // Update list of instructions that read/write memory.
2269 if (MI.mayLoadOrStore())
2270 MemInsns.push_back(&MI);
2271 }
2272 return E;
2273}
2274
2277 assert((MI.getOpcode() == AArch64::SUBXri ||
2278 MI.getOpcode() == AArch64::ADDXri) &&
2279 "Expected a register update instruction");
2280 auto End = MI.getParent()->end();
2281 if (MaybeCFI == End ||
2282 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2283 !(MI.getFlag(MachineInstr::FrameSetup) ||
2284 MI.getFlag(MachineInstr::FrameDestroy)) ||
2285 MI.getOperand(0).getReg() != AArch64::SP)
2286 return End;
2287
2288 const MachineFunction &MF = *MI.getParent()->getParent();
2289 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2290 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2291 switch (CFI.getOperation()) {
2294 return MaybeCFI;
2295 default:
2296 return End;
2297 }
2298}
2299
2300std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2302 bool IsForward, bool IsPreIdx, bool MergeEither) {
2303 assert((Update->getOpcode() == AArch64::ADDXri ||
2304 Update->getOpcode() == AArch64::SUBXri) &&
2305 "Unexpected base register update instruction to merge!");
2306 MachineBasicBlock::iterator E = I->getParent()->end();
2308
2309 // If updating the SP and the following instruction is CFA offset related CFI,
2310 // make sure the CFI follows the SP update either by merging at the location
2311 // of the update or by moving the CFI after the merged instruction. If unable
2312 // to do so, bail.
2313 MachineBasicBlock::iterator InsertPt = I;
2314 if (IsForward) {
2315 assert(IsPreIdx);
2316 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2317 if (MergeEither) {
2318 InsertPt = Update;
2319 } else {
2320 // Take care not to reorder CFIs.
2321 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2322 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2323 }))
2324 return std::nullopt;
2325
2326 MachineBasicBlock *MBB = InsertPt->getParent();
2327 MBB->splice(std::next(InsertPt), MBB, CFI);
2328 }
2329 }
2330 }
2331
2332 // Return the instruction following the merged instruction, which is
2333 // the instruction following our unmerged load. Unless that's the add/sub
2334 // instruction we're merging, in which case it's the one after that.
2335 if (NextI == Update)
2336 NextI = next_nodbg(NextI, E);
2337
2338 int Value = Update->getOperand(2).getImm();
2339 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2340 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2341 if (Update->getOpcode() == AArch64::SUBXri)
2342 Value = -Value;
2343
2344 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2346 MachineInstrBuilder MIB;
2347 int Scale, MinOffset, MaxOffset;
2348 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2350 // Non-paired instruction.
2351 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2352 TII->get(NewOpc))
2353 .add(Update->getOperand(0))
2354 .add(getLdStRegOp(*I))
2356 .addImm(Value / Scale)
2357 .setMemRefs(I->memoperands())
2358 .setMIFlags(I->mergeFlagsWith(*Update));
2359 } else {
2360 // Paired instruction.
2361 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2362 TII->get(NewOpc))
2363 .add(Update->getOperand(0))
2364 .add(getLdStRegOp(*I, 0))
2365 .add(getLdStRegOp(*I, 1))
2367 .addImm(Value / Scale)
2368 .setMemRefs(I->memoperands())
2369 .setMIFlags(I->mergeFlagsWith(*Update));
2370 }
2371
2372 if (IsPreIdx) {
2373 ++NumPreFolded;
2374 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2375 } else {
2376 ++NumPostFolded;
2377 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2378 }
2379 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2380 LLVM_DEBUG(I->print(dbgs()));
2381 LLVM_DEBUG(dbgs() << " ");
2382 LLVM_DEBUG(Update->print(dbgs()));
2383 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2384 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2385 LLVM_DEBUG(dbgs() << "\n");
2386
2387 // Erase the old instructions for the block.
2388 I->eraseFromParent();
2389 Update->eraseFromParent();
2390
2391 return NextI;
2392}
2393
2395AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2397 unsigned Offset, int Scale) {
2398 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2399 "Unexpected const mov instruction to merge!");
2400 MachineBasicBlock::iterator E = I->getParent()->end();
2402 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2403 MachineInstr &MemMI = *I;
2404 unsigned Mask = (1 << 12) * Scale - 1;
2405 unsigned Low = Offset & Mask;
2406 unsigned High = Offset - Low;
2409 MachineInstrBuilder AddMIB, MemMIB;
2410
2411 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2412 AddMIB =
2413 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2414 .addDef(IndexReg)
2415 .addUse(BaseReg)
2416 .addImm(High >> 12) // shifted value
2417 .addImm(12); // shift 12
2418 (void)AddMIB;
2419 // Ld/St DestReg, IndexReg, Imm12
2420 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2421 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2422 .add(getLdStRegOp(MemMI))
2424 .addImm(Low / Scale)
2425 .setMemRefs(I->memoperands())
2426 .setMIFlags(I->mergeFlagsWith(*Update));
2427 (void)MemMIB;
2428
2429 ++NumConstOffsetFolded;
2430 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2431 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2432 LLVM_DEBUG(PrevI->print(dbgs()));
2433 LLVM_DEBUG(dbgs() << " ");
2434 LLVM_DEBUG(Update->print(dbgs()));
2435 LLVM_DEBUG(dbgs() << " ");
2436 LLVM_DEBUG(I->print(dbgs()));
2437 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2438 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2439 LLVM_DEBUG(dbgs() << " ");
2440 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2441 LLVM_DEBUG(dbgs() << "\n");
2442
2443 // Erase the old instructions for the block.
2444 I->eraseFromParent();
2445 PrevI->eraseFromParent();
2446 Update->eraseFromParent();
2447
2448 return NextI;
2449}
2450
2451bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2452 MachineInstr &MI,
2453 unsigned BaseReg, int Offset) {
2454 switch (MI.getOpcode()) {
2455 default:
2456 break;
2457 case AArch64::SUBXri:
2458 case AArch64::ADDXri:
2459 // Make sure it's a vanilla immediate operand, not a relocation or
2460 // anything else we can't handle.
2461 if (!MI.getOperand(2).isImm())
2462 break;
2463 // Watch out for 1 << 12 shifted value.
2464 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2465 break;
2466
2467 // The update instruction source and destination register must be the
2468 // same as the load/store base register.
2469 if (MI.getOperand(0).getReg() != BaseReg ||
2470 MI.getOperand(1).getReg() != BaseReg)
2471 break;
2472
2473 int UpdateOffset = MI.getOperand(2).getImm();
2474 if (MI.getOpcode() == AArch64::SUBXri)
2475 UpdateOffset = -UpdateOffset;
2476
2477 // The immediate must be a multiple of the scaling factor of the pre/post
2478 // indexed instruction.
2479 int Scale, MinOffset, MaxOffset;
2480 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2481 if (UpdateOffset % Scale != 0)
2482 break;
2483
2484 // Scaled offset must fit in the instruction immediate.
2485 int ScaledOffset = UpdateOffset / Scale;
2486 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2487 break;
2488
2489 // If we have a non-zero Offset, we check that it matches the amount
2490 // we're adding to the register.
2491 if (!Offset || Offset == UpdateOffset)
2492 return true;
2493 break;
2494 }
2495 return false;
2496}
2497
2498bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2499 MachineInstr &MI,
2500 unsigned IndexReg,
2501 unsigned &Offset) {
2502 // The update instruction source and destination register must be the
2503 // same as the load/store index register.
2504 if (MI.getOpcode() == AArch64::MOVKWi &&
2505 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2506
2507 // movz + movk hold a large offset of a Ld/St instruction.
2508 MachineBasicBlock::iterator B = MI.getParent()->begin();
2510 // Skip the scene when the MI is the first instruction of a block.
2511 if (MBBI == B)
2512 return false;
2513 MBBI = prev_nodbg(MBBI, B);
2514 MachineInstr &MovzMI = *MBBI;
2515 // Make sure the MOVKWi and MOVZWi set the same register.
2516 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2517 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2518 unsigned Low = MovzMI.getOperand(1).getImm();
2519 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2520 Offset = High + Low;
2521 // 12-bit optionally shifted immediates are legal for adds.
2522 return Offset >> 24 == 0;
2523 }
2524 }
2525 return false;
2526}
2527
2528MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2529 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2530 MachineBasicBlock::iterator E = I->getParent()->end();
2531 MachineInstr &MemMI = *I;
2533
2535 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2536 TII->getMemScale(MemMI);
2537
2538 // Scan forward looking for post-index opportunities. Updating instructions
2539 // can't be formed if the memory instruction doesn't have the offset we're
2540 // looking for.
2541 if (MIUnscaledOffset != UnscaledOffset)
2542 return E;
2543
2544 // If the base register overlaps a source/destination register, we can't
2545 // merge the update. This does not apply to tag store instructions which
2546 // ignore the address part of the source register.
2547 // This does not apply to STGPi as well, which does not have unpredictable
2548 // behavior in this case unlike normal stores, and always performs writeback
2549 // after reading the source register value.
2550 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2551 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2552 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2553 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2554 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2555 return E;
2556 }
2557 }
2558
2559 // Track which register units have been modified and used between the first
2560 // insn (inclusive) and the second insn.
2561 ModifiedRegUnits.clear();
2562 UsedRegUnits.clear();
2563 MBBI = next_nodbg(MBBI, E);
2564
2565 // We can't post-increment the stack pointer if any instruction between
2566 // the memory access (I) and the increment (MBBI) can access the memory
2567 // region defined by [SP, MBBI].
2568 const bool BaseRegSP = BaseReg == AArch64::SP;
2569 if (BaseRegSP && needsWinCFI(I->getMF())) {
2570 // FIXME: For now, we always block the optimization over SP in windows
2571 // targets as it requires to adjust the unwind/debug info, messing up
2572 // the unwind info can actually cause a miscompile.
2573 return E;
2574 }
2575
2576 unsigned Count = 0;
2577 MachineBasicBlock *CurMBB = I->getParent();
2578 // choice of next block to visit is liveins-based
2579 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2580
2581 while (true) {
2582 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2583 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2584 MachineInstr &MI = *MBBI;
2585
2586 // Don't count transient instructions towards the search limit since there
2587 // may be different numbers of them if e.g. debug information is present.
2588 if (!MI.isTransient())
2589 ++Count;
2590
2591 // If we found a match, return it.
2592 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2593 return MBBI;
2594
2595 // Update the status of what the instruction clobbered and used.
2596 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2597 TRI);
2598
2599 // Otherwise, if the base register is used or modified, we have no match,
2600 // so return early. If we are optimizing SP, do not allow instructions
2601 // that may load or store in between the load and the optimized value
2602 // update.
2603 if (!ModifiedRegUnits.available(BaseReg) ||
2604 !UsedRegUnits.available(BaseReg) ||
2605 (BaseRegSP && MBBI->mayLoadOrStore()))
2606 return E;
2607 }
2608
2609 if (!VisitSucc || Limit <= Count)
2610 break;
2611
2612 // Try to go downward to successors along a CF path w/o side enters
2613 // such that BaseReg is alive along it but not at its exits
2614 MachineBasicBlock *SuccToVisit = nullptr;
2615 unsigned LiveSuccCount = 0;
2616 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2617 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2618 if (Succ->isLiveIn(*AI)) {
2619 if (LiveSuccCount++)
2620 return E;
2621 if (Succ->pred_size() == 1)
2622 SuccToVisit = Succ;
2623 break;
2624 }
2625 }
2626 }
2627 if (!SuccToVisit)
2628 break;
2629 CurMBB = SuccToVisit;
2630 MBBI = CurMBB->begin();
2631 }
2632
2633 return E;
2634}
2635
2636MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2637 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2638 MachineBasicBlock::iterator B = I->getParent()->begin();
2639 MachineBasicBlock::iterator E = I->getParent()->end();
2640 MachineInstr &MemMI = *I;
2642 MachineFunction &MF = *MemMI.getMF();
2643
2646
2647 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2648 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2649 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2650 : AArch64::NoRegister};
2651
2652 // If the load/store is the first instruction in the block, there's obviously
2653 // not any matching update. Ditto if the memory offset isn't zero.
2654 if (MBBI == B || Offset != 0)
2655 return E;
2656 // If the base register overlaps a destination register, we can't
2657 // merge the update.
2658 if (!isTagStore(MemMI)) {
2659 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2660 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2661 return E;
2662 }
2663
2664 const bool BaseRegSP = BaseReg == AArch64::SP;
2665 if (BaseRegSP && needsWinCFI(I->getMF())) {
2666 // FIXME: For now, we always block the optimization over SP in windows
2667 // targets as it requires to adjust the unwind/debug info, messing up
2668 // the unwind info can actually cause a miscompile.
2669 return E;
2670 }
2671
2672 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2673 unsigned RedZoneSize =
2674 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2675
2676 // Track which register units have been modified and used between the first
2677 // insn (inclusive) and the second insn.
2678 ModifiedRegUnits.clear();
2679 UsedRegUnits.clear();
2680 unsigned Count = 0;
2681 bool MemAccessBeforeSPPreInc = false;
2682 MergeEither = true;
2683 do {
2684 MBBI = prev_nodbg(MBBI, B);
2685 MachineInstr &MI = *MBBI;
2686
2687 // Don't count transient instructions towards the search limit since there
2688 // may be different numbers of them if e.g. debug information is present.
2689 if (!MI.isTransient())
2690 ++Count;
2691
2692 // If we found a match, return it.
2693 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2694 // Check that the update value is within our red zone limit (which may be
2695 // zero).
2696 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2697 return E;
2698 return MBBI;
2699 }
2700
2701 // Update the status of what the instruction clobbered and used.
2702 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2703
2704 // Otherwise, if the base register is used or modified, we have no match, so
2705 // return early.
2706 if (!ModifiedRegUnits.available(BaseReg) ||
2707 !UsedRegUnits.available(BaseReg))
2708 return E;
2709
2710 // If we have a destination register (i.e. a load instruction) and a
2711 // destination register is used or modified, then we can only merge forward,
2712 // i.e. the combined instruction is put in the place of the memory
2713 // instruction. Same applies if we see a memory access or side effects.
2714 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2715 (DestReg[0] != AArch64::NoRegister &&
2716 !(ModifiedRegUnits.available(DestReg[0]) &&
2717 UsedRegUnits.available(DestReg[0]))) ||
2718 (DestReg[1] != AArch64::NoRegister &&
2719 !(ModifiedRegUnits.available(DestReg[1]) &&
2720 UsedRegUnits.available(DestReg[1]))))
2721 MergeEither = false;
2722
2723 // Keep track if we have a memory access before an SP pre-increment, in this
2724 // case we need to validate later that the update amount respects the red
2725 // zone.
2726 if (BaseRegSP && MBBI->mayLoadOrStore())
2727 MemAccessBeforeSPPreInc = true;
2728 } while (MBBI != B && Count < Limit);
2729 return E;
2730}
2731
2733AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2734 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2735 MachineBasicBlock::iterator B = I->getParent()->begin();
2736 MachineBasicBlock::iterator E = I->getParent()->end();
2737 MachineInstr &MemMI = *I;
2739
2740 // If the load is the first instruction in the block, there's obviously
2741 // not any matching load or store.
2742 if (MBBI == B)
2743 return E;
2744
2745 // Make sure the IndexReg is killed and the shift amount is zero.
2746 // TODO: Relex this restriction to extend, simplify processing now.
2747 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2748 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2750 return E;
2751
2753
2754 // Track which register units have been modified and used between the first
2755 // insn (inclusive) and the second insn.
2756 ModifiedRegUnits.clear();
2757 UsedRegUnits.clear();
2758 unsigned Count = 0;
2759 do {
2760 MBBI = prev_nodbg(MBBI, B);
2761 MachineInstr &MI = *MBBI;
2762
2763 // Don't count transient instructions towards the search limit since there
2764 // may be different numbers of them if e.g. debug information is present.
2765 if (!MI.isTransient())
2766 ++Count;
2767
2768 // If we found a match, return it.
2769 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2770 return MBBI;
2771 }
2772
2773 // Update the status of what the instruction clobbered and used.
2774 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2775
2776 // Otherwise, if the index register is used or modified, we have no match,
2777 // so return early.
2778 if (!ModifiedRegUnits.available(IndexReg) ||
2779 !UsedRegUnits.available(IndexReg))
2780 return E;
2781
2782 } while (MBBI != B && Count < Limit);
2783 return E;
2784}
2785
2786bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2788 MachineInstr &MI = *MBBI;
2789 // If this is a volatile load, don't mess with it.
2790 if (MI.hasOrderedMemoryRef())
2791 return false;
2792
2793 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2794 return false;
2795
2796 // Make sure this is a reg+imm.
2797 // FIXME: It is possible to extend it to handle reg+reg cases.
2799 return false;
2800
2801 // Look backward up to LdStLimit instructions.
2803 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2804 ++NumLoadsFromStoresPromoted;
2805 // Promote the load. Keeping the iterator straight is a
2806 // pain, so we let the merge routine tell us what the next instruction
2807 // is after it's done mucking about.
2808 MBBI = promoteLoadFromStore(MBBI, StoreI);
2809 return true;
2810 }
2811 return false;
2812}
2813
2814// Merge adjacent zero stores into a wider store.
2815bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2817 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2818 MachineInstr &MI = *MBBI;
2819 MachineBasicBlock::iterator E = MI.getParent()->end();
2820
2821 if (!TII->isCandidateToMergeOrPair(MI))
2822 return false;
2823
2824 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2825 LdStPairFlags Flags;
2827 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2828 if (MergeMI != E) {
2829 ++NumZeroStoresPromoted;
2830
2831 // Keeping the iterator straight is a pain, so we let the merge routine tell
2832 // us what the next instruction is after it's done mucking about.
2833 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2834 return true;
2835 }
2836 return false;
2837}
2838
2839// Find loads and stores that can be merged into a single load or store pair
2840// instruction.
2841bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2842 MachineInstr &MI = *MBBI;
2843 MachineBasicBlock::iterator E = MI.getParent()->end();
2844
2845 if (!TII->isCandidateToMergeOrPair(MI))
2846 return false;
2847
2848 // If disable-ldp feature is opted, do not emit ldp.
2849 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2850 return false;
2851
2852 // If disable-stp feature is opted, do not emit stp.
2853 if (MI.mayStore() && Subtarget->hasDisableStp())
2854 return false;
2855
2856 // Early exit if the offset is not possible to match. (6 bits of positive
2857 // range, plus allow an extra one in case we find a later insn that matches
2858 // with Offset-1)
2859 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2861 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2862 // Allow one more for offset.
2863 if (Offset > 0)
2864 Offset -= OffsetStride;
2865 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2866 return false;
2867
2868 // Look ahead up to LdStLimit instructions for a pairable instruction.
2869 LdStPairFlags Flags;
2871 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2872 if (Paired != E) {
2873 // Keeping the iterator straight is a pain, so we let the merge routine tell
2874 // us what the next instruction is after it's done mucking about.
2875 auto Prev = std::prev(MBBI);
2876
2877 // Fetch the memoperand of the load/store that is a candidate for
2878 // combination.
2879 MachineMemOperand *MemOp =
2880 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2881
2882 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2883 // that the alignment of the source pointer is at least double the alignment
2884 // of the type.
2885 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2886 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2887 // If there is no size/align information, cancel the transformation.
2888 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2889 NumFailedAlignmentCheck++;
2890 return false;
2891 }
2892
2893 // Get the needed alignments to check them if
2894 // ldp-aligned-only/stp-aligned-only features are opted.
2895 uint64_t MemAlignment = MemOp->getAlign().value();
2896 uint64_t TypeAlignment =
2897 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2898
2899 if (MemAlignment < 2 * TypeAlignment) {
2900 NumFailedAlignmentCheck++;
2901 return false;
2902 }
2903 }
2904
2905 ++NumPairCreated;
2906 if (TII->hasUnscaledLdStOffset(MI))
2907 ++NumUnscaledPairCreated;
2908
2909 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2910 // Collect liveness info for instructions between Prev and the new position
2911 // MBBI.
2912 for (auto I = std::next(Prev); I != MBBI; I++)
2913 updateDefinedRegisters(*I, DefinedInBB, TRI);
2914
2915 return true;
2916 }
2917 return false;
2918}
2919
2920bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2922 MachineInstr &MI = *MBBI;
2923 MachineBasicBlock::iterator E = MI.getParent()->end();
2925
2926 // Look forward to try to form a post-index instruction. For example,
2927 // ldr x0, [x20]
2928 // add x20, x20, #32
2929 // merged into:
2930 // ldr x0, [x20], #32
2931 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2932 if (Update != E) {
2933 // Merge the update into the ld/st.
2934 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2935 /*IsPreIdx=*/false,
2936 /*MergeEither=*/false)) {
2937 MBBI = *NextI;
2938 return true;
2939 }
2940 }
2941
2942 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2943 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2944 return false;
2945
2946 // Look back to try to find a pre-index instruction. For example,
2947 // add x0, x0, #8
2948 // ldr x1, [x0]
2949 // merged into:
2950 // ldr x1, [x0, #8]!
2951 bool MergeEither;
2952 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2953 if (Update != E) {
2954 // Merge the update into the ld/st.
2955 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2956 /*IsPreIdx=*/true, MergeEither)) {
2957 MBBI = *NextI;
2958 return true;
2959 }
2960 }
2961
2962 // The immediate in the load/store is scaled by the size of the memory
2963 // operation. The immediate in the add we're looking for,
2964 // however, is not, so adjust here.
2965 int UnscaledOffset =
2967
2968 // Look forward to try to find a pre-index instruction. For example,
2969 // ldr x1, [x0, #64]
2970 // add x0, x0, #64
2971 // merged into:
2972 // ldr x1, [x0, #64]!
2973 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2974 if (Update != E) {
2975 // Merge the update into the ld/st.
2976 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2977 /*IsPreIdx=*/true,
2978 /*MergeEither=*/false)) {
2979 MBBI = *NextI;
2980 return true;
2981 }
2982 }
2983
2984 return false;
2985}
2986
2987bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2988 int Scale) {
2989 MachineInstr &MI = *MBBI;
2990 MachineBasicBlock::iterator E = MI.getParent()->end();
2992
2993 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2994 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2995 return false;
2996
2997 // Look back to try to find a const offset for index LdSt instruction. For
2998 // example,
2999 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3000 // ldr x1, [x0, x8]
3001 // merged into:
3002 // add x8, x0, a * (1<<12)
3003 // ldr x1, [x8, imm12]
3004 unsigned Offset;
3005 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
3006 if (Update != E && (Offset & (Scale - 1)) == 0) {
3007 // Merge the imm12 into the ld/st.
3008 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
3009 return true;
3010 }
3011
3012 return false;
3013}
3014
3015bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
3016 bool EnableNarrowZeroStOpt) {
3017 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
3018
3019 bool Modified = false;
3020 // Four transformations to do here:
3021 // 1) Find loads that directly read from stores and promote them by
3022 // replacing with mov instructions. If the store is wider than the load,
3023 // the load will be replaced with a bitfield extract.
3024 // e.g.,
3025 // str w1, [x0, #4]
3026 // ldrh w2, [x0, #6]
3027 // ; becomes
3028 // str w1, [x0, #4]
3029 // lsr w2, w1, #16
3031 MBBI != E;) {
3032 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3033 Modified = true;
3034 else
3035 ++MBBI;
3036 }
3037 // 2) Merge adjacent zero stores into a wider store.
3038 // e.g.,
3039 // strh wzr, [x0]
3040 // strh wzr, [x0, #2]
3041 // ; becomes
3042 // str wzr, [x0]
3043 // e.g.,
3044 // str wzr, [x0]
3045 // str wzr, [x0, #4]
3046 // ; becomes
3047 // str xzr, [x0]
3048 if (EnableNarrowZeroStOpt)
3050 MBBI != E;) {
3051 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3052 Modified = true;
3053 else
3054 ++MBBI;
3055 }
3056 // 3) Find loads and stores that can be merged into a single load or store
3057 // pair instruction.
3058 // When compiling for SVE 128, also try to combine SVE fill/spill
3059 // instructions into LDP/STP.
3060 // e.g.,
3061 // ldr x0, [x2]
3062 // ldr x1, [x2, #8]
3063 // ; becomes
3064 // ldp x0, x1, [x2]
3065 // e.g.,
3066 // ldr z0, [x2]
3067 // ldr z1, [x2, #1, mul vl]
3068 // ; becomes
3069 // ldp q0, q1, [x2]
3070
3072 DefinedInBB.clear();
3073 DefinedInBB.addLiveIns(MBB);
3074 }
3075
3077 MBBI != E;) {
3078 // Track currently live registers up to this point, to help with
3079 // searching for a rename register on demand.
3080 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3081 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3082 Modified = true;
3083 else
3084 ++MBBI;
3085 }
3086 // 4) Find base register updates that can be merged into the load or store
3087 // as a base-reg writeback.
3088 // e.g.,
3089 // ldr x0, [x2]
3090 // add x2, x2, #4
3091 // ; becomes
3092 // ldr x0, [x2], #4
3094 MBBI != E;) {
3095 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3096 Modified = true;
3097 else
3098 ++MBBI;
3099 }
3100
3101 // 5) Find a register assigned with a const value that can be combined with
3102 // into the load or store. e.g.,
3103 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3104 // ldr x1, [x0, x8]
3105 // ; becomes
3106 // add x8, x0, a * (1<<12)
3107 // ldr x1, [x8, imm12]
3109 MBBI != E;) {
3110 int Scale;
3111 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3112 Modified = true;
3113 else
3114 ++MBBI;
3115 }
3116
3117 return Modified;
3118}
3119
3120bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3121 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3122 TII = Subtarget->getInstrInfo();
3123 TRI = Subtarget->getRegisterInfo();
3124
3125 // Resize the modified and used register unit trackers. We do this once
3126 // per function and then clear the register units each time we optimize a load
3127 // or store.
3128 ModifiedRegUnits.init(*TRI);
3129 UsedRegUnits.init(*TRI);
3130 DefinedInBB.init(*TRI);
3131
3132 bool Modified = false;
3133 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3134 for (auto &MBB : Fn) {
3135 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3136 Modified |= M;
3137 }
3138
3139 return Modified;
3140}
3141
3142// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3143// stores near one another? Note: The pre-RA instruction scheduler already has
3144// hooks to try and schedule pairable loads/stores together to improve pairing
3145// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3146
3147// FIXME: When pairing store instructions it's very possible for this pass to
3148// hoist a store with a KILL marker above another use (without a KILL marker).
3149// The resulting IR is invalid, but nothing uses the KILL markers after this
3150// pass, so it's never caused a problem in practice.
3151
3152bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3153 if (skipFunction(MF.getFunction()))
3154 return false;
3155 AArch64LoadStoreOpt Impl;
3156 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3157 return Impl.runOnMachineFunction(MF);
3158}
3159
3160/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3161/// load / store optimization pass.
3163 return new AArch64LoadStoreOptLegacy();
3164}
3165
3169 AArch64LoadStoreOpt Impl;
3171 .getManager()
3172 .getResult<AAManager>(MF.getFunction());
3173 bool Changed = Impl.runOnMachineFunction(MF);
3174 if (!Changed)
3175 return PreservedAnalyses::all();
3178 return PA;
3179}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:655
OpType getOperation() const
Definition MCDwarf.h:714
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.