LLVM 22.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCDwarf.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
45#include <cassert>
46#include <cstdint>
47#include <functional>
48#include <iterator>
49#include <limits>
50#include <optional>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "aarch64-ldst-opt"
55
56STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded, "Number of post-index updates folded");
58STATISTIC(NumPreFolded, "Number of pre-index updates folded");
59STATISTIC(NumUnscaledPairCreated,
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
64 "not passed the alignment check");
65STATISTIC(NumConstOffsetFolded,
66 "Number of const offset of index address folded");
67
68DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
69 "Controls which pairs are considered for renaming");
70
71// The LdStLimit limits how far we search for load/store pairs.
72static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
73 cl::init(20), cl::Hidden);
74
75// The UpdateLimit limits how far we search for update instructions when we form
76// pre-/post-index instructions.
77static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
79
80// The LdStConstLimit limits how far we search for const offset instructions
81// when we form index address load/store instructions.
82static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
83 cl::init(10), cl::Hidden);
84
85// Enable register renaming to find additional store pairing opportunities.
86static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
87 cl::init(true), cl::Hidden);
88
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
90
91namespace {
92
93using LdStPairFlags = struct LdStPairFlags {
94 // If a matching instruction is found, MergeForward is set to true if the
95 // merge is to remove the first instruction and replace the second with
96 // a pair-wise insn, and false if the reverse is true.
97 bool MergeForward = false;
98
99 // SExtIdx gives the index of the result of the load pair that must be
100 // extended. The value of SExtIdx assumes that the paired load produces the
101 // value in this order: (I, returned iterator), i.e., -1 means no value has
102 // to be extended, 0 means I, and 1 means the returned iterator.
103 int SExtIdx = -1;
104
105 // If not none, RenameReg can be used to rename the result register of the
106 // first store in a pair. Currently this only works when merging stores
107 // forward.
108 std::optional<MCPhysReg> RenameReg;
109
110 LdStPairFlags() = default;
111
112 void setMergeForward(bool V = true) { MergeForward = V; }
113 bool getMergeForward() const { return MergeForward; }
114
115 void setSExtIdx(int V) { SExtIdx = V; }
116 int getSExtIdx() const { return SExtIdx; }
117
118 void setRenameReg(MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
121};
122
123struct AArch64LoadStoreOpt : public MachineFunctionPass {
124 static char ID;
125
126 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
127
129 const AArch64InstrInfo *TII;
130 const TargetRegisterInfo *TRI;
131 const AArch64Subtarget *Subtarget;
132
133 // Track which register units have been modified and used.
134 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
135 LiveRegUnits DefinedInBB;
136
137 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 }
141
142 // Scan the instructions looking for a load/store that can be combined
143 // with the current instruction into a load/store pair.
144 // Return the matching instruction if one is found, else MBB->end().
146 LdStPairFlags &Flags,
147 unsigned Limit,
148 bool FindNarrowMerge);
149
150 // Scan the instructions looking for a store that writes to the address from
151 // which the current load instruction reads. Return true if one is found.
152 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
154
155 // Merge the two instructions indicated into a wider narrow store instruction.
157 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
159 const LdStPairFlags &Flags);
160
161 // Merge the two instructions indicated into a single pair-wise instruction.
163 mergePairedInsns(MachineBasicBlock::iterator I,
165 const LdStPairFlags &Flags);
166
167 // Promote the load that reads directly from the address stored to.
169 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
171
172 // Scan the instruction list to find a base register update that can
173 // be combined with the current instruction (a load or store) using
174 // pre or post indexed addressing with writeback. Scan forwards.
176 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
177 int UnscaledOffset, unsigned Limit);
178
179 // Scan the instruction list to find a register assigned with a const
180 // value that can be combined with the current instruction (a load or store)
181 // using base addressing with writeback. Scan backwards.
183 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
184 unsigned &Offset);
185
186 // Scan the instruction list to find a base register update that can
187 // be combined with the current instruction (a load or store) using
188 // pre or post indexed addressing with writeback. Scan backwards.
189 // `MergeEither` is set to true if the combined instruction may be placed
190 // either at the location of the load/store instruction or at the location of
191 // the update instruction.
193 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
194 bool &MergeEither);
195
196 // Find an instruction that updates the base register of the ld/st
197 // instruction.
198 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
199 unsigned BaseReg, int Offset);
200
201 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
202 unsigned IndexReg, unsigned &Offset);
203
204 // Merge a pre- or post-index base register update into a ld/st instruction.
205 std::optional<MachineBasicBlock::iterator>
206 mergeUpdateInsn(MachineBasicBlock::iterator I,
207 MachineBasicBlock::iterator Update, bool IsForward,
208 bool IsPreIdx, bool MergeEither);
209
211 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
212 MachineBasicBlock::iterator Update, unsigned Offset,
213 int Scale);
214
215 // Find and merge zero store instructions.
216 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
217
218 // Find and pair ldr/str instructions.
219 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
220
221 // Find and promote load instructions which read directly from store.
222 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
223
224 // Find and merge a base register updates before or after a ld/st instruction.
225 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
226
227 // Find and merge an index ldr/st instruction into a base ld/st instruction.
228 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
229
230 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
231
232 bool runOnMachineFunction(MachineFunction &Fn) override;
233
234 MachineFunctionProperties getRequiredProperties() const override {
235 return MachineFunctionProperties().setNoVRegs();
236 }
237
238 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
239};
240
241char AArch64LoadStoreOpt::ID = 0;
242
243} // end anonymous namespace
244
245INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
246 AARCH64_LOAD_STORE_OPT_NAME, false, false)
247
248static bool isNarrowStore(unsigned Opc) {
249 switch (Opc) {
250 default:
251 return false;
252 case AArch64::STRBBui:
253 case AArch64::STURBBi:
254 case AArch64::STRHHui:
255 case AArch64::STURHHi:
256 return true;
257 }
258}
259
260// These instruction set memory tag and either keep memory contents unchanged or
261// set it to zero, ignoring the address part of the source register.
262static bool isTagStore(const MachineInstr &MI) {
263 switch (MI.getOpcode()) {
264 default:
265 return false;
266 case AArch64::STGi:
267 case AArch64::STZGi:
268 case AArch64::ST2Gi:
269 case AArch64::STZ2Gi:
270 return true;
271 }
272}
273
274static unsigned getMatchingNonSExtOpcode(unsigned Opc,
275 bool *IsValidLdStrOpc = nullptr) {
276 if (IsValidLdStrOpc)
277 *IsValidLdStrOpc = true;
278 switch (Opc) {
279 default:
280 if (IsValidLdStrOpc)
281 *IsValidLdStrOpc = false;
282 return std::numeric_limits<unsigned>::max();
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 case AArch64::STRDpre:
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 case AArch64::STRQpre:
289 case AArch64::STRBBui:
290 case AArch64::STURBBi:
291 case AArch64::STRHHui:
292 case AArch64::STURHHi:
293 case AArch64::STRWui:
294 case AArch64::STRWpre:
295 case AArch64::STURWi:
296 case AArch64::STRXui:
297 case AArch64::STRXpre:
298 case AArch64::STURXi:
299 case AArch64::STR_ZXI:
300 case AArch64::LDRDui:
301 case AArch64::LDURDi:
302 case AArch64::LDRDpre:
303 case AArch64::LDRQui:
304 case AArch64::LDURQi:
305 case AArch64::LDRQpre:
306 case AArch64::LDRWui:
307 case AArch64::LDURWi:
308 case AArch64::LDRWpre:
309 case AArch64::LDRXui:
310 case AArch64::LDURXi:
311 case AArch64::LDRXpre:
312 case AArch64::STRSui:
313 case AArch64::STURSi:
314 case AArch64::STRSpre:
315 case AArch64::LDRSui:
316 case AArch64::LDURSi:
317 case AArch64::LDRSpre:
318 case AArch64::LDR_ZXI:
319 return Opc;
320 case AArch64::LDRSWui:
321 return AArch64::LDRWui;
322 case AArch64::LDURSWi:
323 return AArch64::LDURWi;
324 case AArch64::LDRSWpre:
325 return AArch64::LDRWpre;
326 }
327}
328
329static unsigned getMatchingWideOpcode(unsigned Opc) {
330 switch (Opc) {
331 default:
332 llvm_unreachable("Opcode has no wide equivalent!");
333 case AArch64::STRBBui:
334 return AArch64::STRHHui;
335 case AArch64::STRHHui:
336 return AArch64::STRWui;
337 case AArch64::STURBBi:
338 return AArch64::STURHHi;
339 case AArch64::STURHHi:
340 return AArch64::STURWi;
341 case AArch64::STURWi:
342 return AArch64::STURXi;
343 case AArch64::STRWui:
344 return AArch64::STRXui;
345 }
346}
347
348static unsigned getMatchingPairOpcode(unsigned Opc) {
349 switch (Opc) {
350 default:
351 llvm_unreachable("Opcode has no pairwise equivalent!");
352 case AArch64::STRSui:
353 case AArch64::STURSi:
354 return AArch64::STPSi;
355 case AArch64::STRSpre:
356 return AArch64::STPSpre;
357 case AArch64::STRDui:
358 case AArch64::STURDi:
359 return AArch64::STPDi;
360 case AArch64::STRDpre:
361 return AArch64::STPDpre;
362 case AArch64::STRQui:
363 case AArch64::STURQi:
364 case AArch64::STR_ZXI:
365 return AArch64::STPQi;
366 case AArch64::STRQpre:
367 return AArch64::STPQpre;
368 case AArch64::STRWui:
369 case AArch64::STURWi:
370 return AArch64::STPWi;
371 case AArch64::STRWpre:
372 return AArch64::STPWpre;
373 case AArch64::STRXui:
374 case AArch64::STURXi:
375 return AArch64::STPXi;
376 case AArch64::STRXpre:
377 return AArch64::STPXpre;
378 case AArch64::LDRSui:
379 case AArch64::LDURSi:
380 return AArch64::LDPSi;
381 case AArch64::LDRSpre:
382 return AArch64::LDPSpre;
383 case AArch64::LDRDui:
384 case AArch64::LDURDi:
385 return AArch64::LDPDi;
386 case AArch64::LDRDpre:
387 return AArch64::LDPDpre;
388 case AArch64::LDRQui:
389 case AArch64::LDURQi:
390 case AArch64::LDR_ZXI:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
409 }
410}
411
414 unsigned LdOpc = LoadInst.getOpcode();
415 unsigned StOpc = StoreInst.getOpcode();
416 switch (LdOpc) {
417 default:
418 llvm_unreachable("Unsupported load instruction!");
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
439 }
440}
441
442static unsigned getPreIndexedOpcode(unsigned Opc) {
443 // FIXME: We don't currently support creating pre-indexed loads/stores when
444 // the load or store is the unscaled version. If we decide to perform such an
445 // optimization in the future the cases for the unscaled loads/stores will
446 // need to be added here.
447 switch (Opc) {
448 default:
449 llvm_unreachable("Opcode has no pre-indexed equivalent!");
450 case AArch64::STRSui:
451 return AArch64::STRSpre;
452 case AArch64::STRDui:
453 return AArch64::STRDpre;
454 case AArch64::STRQui:
455 return AArch64::STRQpre;
456 case AArch64::STRBBui:
457 return AArch64::STRBBpre;
458 case AArch64::STRHHui:
459 return AArch64::STRHHpre;
460 case AArch64::STRWui:
461 return AArch64::STRWpre;
462 case AArch64::STRXui:
463 return AArch64::STRXpre;
464 case AArch64::LDRSui:
465 return AArch64::LDRSpre;
466 case AArch64::LDRDui:
467 return AArch64::LDRDpre;
468 case AArch64::LDRQui:
469 return AArch64::LDRQpre;
470 case AArch64::LDRBBui:
471 return AArch64::LDRBBpre;
472 case AArch64::LDRHHui:
473 return AArch64::LDRHHpre;
474 case AArch64::LDRWui:
475 return AArch64::LDRWpre;
476 case AArch64::LDRXui:
477 return AArch64::LDRXpre;
478 case AArch64::LDRSWui:
479 return AArch64::LDRSWpre;
480 case AArch64::LDPSi:
481 return AArch64::LDPSpre;
482 case AArch64::LDPSWi:
483 return AArch64::LDPSWpre;
484 case AArch64::LDPDi:
485 return AArch64::LDPDpre;
486 case AArch64::LDPQi:
487 return AArch64::LDPQpre;
488 case AArch64::LDPWi:
489 return AArch64::LDPWpre;
490 case AArch64::LDPXi:
491 return AArch64::LDPXpre;
492 case AArch64::STPSi:
493 return AArch64::STPSpre;
494 case AArch64::STPDi:
495 return AArch64::STPDpre;
496 case AArch64::STPQi:
497 return AArch64::STPQpre;
498 case AArch64::STPWi:
499 return AArch64::STPWpre;
500 case AArch64::STPXi:
501 return AArch64::STPXpre;
502 case AArch64::STGi:
503 return AArch64::STGPreIndex;
504 case AArch64::STZGi:
505 return AArch64::STZGPreIndex;
506 case AArch64::ST2Gi:
507 return AArch64::ST2GPreIndex;
508 case AArch64::STZ2Gi:
509 return AArch64::STZ2GPreIndex;
510 case AArch64::STGPi:
511 return AArch64::STGPpre;
512 }
513}
514
515static unsigned getBaseAddressOpcode(unsigned Opc) {
516 // TODO: Add more index address stores.
517 switch (Opc) {
518 default:
519 llvm_unreachable("Opcode has no base address equivalent!");
520 case AArch64::LDRBroX:
521 return AArch64::LDRBui;
522 case AArch64::LDRBBroX:
523 return AArch64::LDRBBui;
524 case AArch64::LDRSBXroX:
525 return AArch64::LDRSBXui;
526 case AArch64::LDRSBWroX:
527 return AArch64::LDRSBWui;
528 case AArch64::LDRHroX:
529 return AArch64::LDRHui;
530 case AArch64::LDRHHroX:
531 return AArch64::LDRHHui;
532 case AArch64::LDRSHXroX:
533 return AArch64::LDRSHXui;
534 case AArch64::LDRSHWroX:
535 return AArch64::LDRSHWui;
536 case AArch64::LDRWroX:
537 return AArch64::LDRWui;
538 case AArch64::LDRSroX:
539 return AArch64::LDRSui;
540 case AArch64::LDRSWroX:
541 return AArch64::LDRSWui;
542 case AArch64::LDRDroX:
543 return AArch64::LDRDui;
544 case AArch64::LDRXroX:
545 return AArch64::LDRXui;
546 case AArch64::LDRQroX:
547 return AArch64::LDRQui;
548 }
549}
550
551static unsigned getPostIndexedOpcode(unsigned Opc) {
552 switch (Opc) {
553 default:
554 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
555 case AArch64::STRSui:
556 case AArch64::STURSi:
557 return AArch64::STRSpost;
558 case AArch64::STRDui:
559 case AArch64::STURDi:
560 return AArch64::STRDpost;
561 case AArch64::STRQui:
562 case AArch64::STURQi:
563 return AArch64::STRQpost;
564 case AArch64::STRBBui:
565 return AArch64::STRBBpost;
566 case AArch64::STRHHui:
567 return AArch64::STRHHpost;
568 case AArch64::STRWui:
569 case AArch64::STURWi:
570 return AArch64::STRWpost;
571 case AArch64::STRXui:
572 case AArch64::STURXi:
573 return AArch64::STRXpost;
574 case AArch64::LDRSui:
575 case AArch64::LDURSi:
576 return AArch64::LDRSpost;
577 case AArch64::LDRDui:
578 case AArch64::LDURDi:
579 return AArch64::LDRDpost;
580 case AArch64::LDRQui:
581 case AArch64::LDURQi:
582 return AArch64::LDRQpost;
583 case AArch64::LDRBBui:
584 return AArch64::LDRBBpost;
585 case AArch64::LDRHHui:
586 return AArch64::LDRHHpost;
587 case AArch64::LDRWui:
588 case AArch64::LDURWi:
589 return AArch64::LDRWpost;
590 case AArch64::LDRXui:
591 case AArch64::LDURXi:
592 return AArch64::LDRXpost;
593 case AArch64::LDRSWui:
594 return AArch64::LDRSWpost;
595 case AArch64::LDPSi:
596 return AArch64::LDPSpost;
597 case AArch64::LDPSWi:
598 return AArch64::LDPSWpost;
599 case AArch64::LDPDi:
600 return AArch64::LDPDpost;
601 case AArch64::LDPQi:
602 return AArch64::LDPQpost;
603 case AArch64::LDPWi:
604 return AArch64::LDPWpost;
605 case AArch64::LDPXi:
606 return AArch64::LDPXpost;
607 case AArch64::STPSi:
608 return AArch64::STPSpost;
609 case AArch64::STPDi:
610 return AArch64::STPDpost;
611 case AArch64::STPQi:
612 return AArch64::STPQpost;
613 case AArch64::STPWi:
614 return AArch64::STPWpost;
615 case AArch64::STPXi:
616 return AArch64::STPXpost;
617 case AArch64::STGi:
618 return AArch64::STGPostIndex;
619 case AArch64::STZGi:
620 return AArch64::STZGPostIndex;
621 case AArch64::ST2Gi:
622 return AArch64::ST2GPostIndex;
623 case AArch64::STZ2Gi:
624 return AArch64::STZ2GPostIndex;
625 case AArch64::STGPi:
626 return AArch64::STGPpost;
627 }
628}
629
631
632 unsigned OpcA = FirstMI.getOpcode();
633 unsigned OpcB = MI.getOpcode();
634
635 switch (OpcA) {
636 default:
637 return false;
638 case AArch64::STRSpre:
639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
640 case AArch64::STRDpre:
641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
642 case AArch64::STRQpre:
643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
644 case AArch64::STRWpre:
645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
646 case AArch64::STRXpre:
647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
648 case AArch64::LDRSpre:
649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
650 case AArch64::LDRDpre:
651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
652 case AArch64::LDRQpre:
653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
654 case AArch64::LDRWpre:
655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
656 case AArch64::LDRXpre:
657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
658 case AArch64::LDRSWpre:
659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
660 }
661}
662
663// Returns the scale and offset range of pre/post indexed variants of MI.
664static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
665 int &MinOffset, int &MaxOffset) {
666 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
667 bool IsTagStore = isTagStore(MI);
668 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
669 // as in the "unsigned offset" variant.
670 // All other pre/post indexed ldst instructions are unscaled.
671 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
672
673 if (IsPaired) {
674 MinOffset = -64;
675 MaxOffset = 63;
676 } else {
677 MinOffset = -256;
678 MaxOffset = 255;
679 }
680}
681
683 unsigned PairedRegOp = 0) {
684 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
685 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
686 if (IsPreLdSt)
687 PairedRegOp += 1;
688 unsigned Idx =
689 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
690 return MI.getOperand(Idx);
691}
692
695 const AArch64InstrInfo *TII) {
696 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
697 int LoadSize = TII->getMemScale(LoadInst);
698 int StoreSize = TII->getMemScale(StoreInst);
699 int UnscaledStOffset =
700 TII->hasUnscaledLdStOffset(StoreInst)
703 int UnscaledLdOffset =
704 TII->hasUnscaledLdStOffset(LoadInst)
707 return (UnscaledStOffset <= UnscaledLdOffset) &&
708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
709}
710
712 unsigned Opc = MI.getOpcode();
713 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
714 isNarrowStore(Opc)) &&
715 getLdStRegOp(MI).getReg() == AArch64::WZR;
716}
717
719 switch (MI.getOpcode()) {
720 default:
721 return false;
722 // Scaled instructions.
723 case AArch64::LDRBBui:
724 case AArch64::LDRHHui:
725 case AArch64::LDRWui:
726 case AArch64::LDRXui:
727 // Unscaled instructions.
728 case AArch64::LDURBBi:
729 case AArch64::LDURHHi:
730 case AArch64::LDURWi:
731 case AArch64::LDURXi:
732 return true;
733 }
734}
735
737 unsigned Opc = MI.getOpcode();
738 switch (Opc) {
739 default:
740 return false;
741 // Scaled instructions.
742 case AArch64::STRSui:
743 case AArch64::STRDui:
744 case AArch64::STRQui:
745 case AArch64::STRXui:
746 case AArch64::STRWui:
747 case AArch64::STRHHui:
748 case AArch64::STRBBui:
749 case AArch64::LDRSui:
750 case AArch64::LDRDui:
751 case AArch64::LDRQui:
752 case AArch64::LDRXui:
753 case AArch64::LDRWui:
754 case AArch64::LDRHHui:
755 case AArch64::LDRBBui:
756 case AArch64::STGi:
757 case AArch64::STZGi:
758 case AArch64::ST2Gi:
759 case AArch64::STZ2Gi:
760 case AArch64::STGPi:
761 // Unscaled instructions.
762 case AArch64::STURSi:
763 case AArch64::STURDi:
764 case AArch64::STURQi:
765 case AArch64::STURWi:
766 case AArch64::STURXi:
767 case AArch64::LDURSi:
768 case AArch64::LDURDi:
769 case AArch64::LDURQi:
770 case AArch64::LDURWi:
771 case AArch64::LDURXi:
772 // Paired instructions.
773 case AArch64::LDPSi:
774 case AArch64::LDPSWi:
775 case AArch64::LDPDi:
776 case AArch64::LDPQi:
777 case AArch64::LDPWi:
778 case AArch64::LDPXi:
779 case AArch64::STPSi:
780 case AArch64::STPDi:
781 case AArch64::STPQi:
782 case AArch64::STPWi:
783 case AArch64::STPXi:
784 // Make sure this is a reg+imm (as opposed to an address reloc).
786 return false;
787
788 // When using stack tagging, simple sp+imm loads and stores are not
789 // tag-checked, but pre- and post-indexed versions of them are, so we can't
790 // replace the former with the latter. This transformation would be valid
791 // if the load/store accesses an untagged stack slot, but we don't have
792 // that information available after frame indices have been eliminated.
793 if (AFI.isMTETagged() &&
794 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
795 return false;
796
797 return true;
798 }
799}
800
801// Make sure this is a reg+reg Ld/St
802static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
803 unsigned Opc = MI.getOpcode();
804 switch (Opc) {
805 default:
806 return false;
807 // Scaled instructions.
808 // TODO: Add more index address stores.
809 case AArch64::LDRBroX:
810 case AArch64::LDRBBroX:
811 case AArch64::LDRSBXroX:
812 case AArch64::LDRSBWroX:
813 Scale = 1;
814 return true;
815 case AArch64::LDRHroX:
816 case AArch64::LDRHHroX:
817 case AArch64::LDRSHXroX:
818 case AArch64::LDRSHWroX:
819 Scale = 2;
820 return true;
821 case AArch64::LDRWroX:
822 case AArch64::LDRSroX:
823 case AArch64::LDRSWroX:
824 Scale = 4;
825 return true;
826 case AArch64::LDRDroX:
827 case AArch64::LDRXroX:
828 Scale = 8;
829 return true;
830 case AArch64::LDRQroX:
831 Scale = 16;
832 return true;
833 }
834}
835
836static bool isRewritableImplicitDef(unsigned Opc) {
837 switch (Opc) {
838 default:
839 return false;
840 case AArch64::ORRWrs:
841 case AArch64::ADDWri:
842 return true;
843 }
844}
845
847AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
849 const LdStPairFlags &Flags) {
851 "Expected promotable zero stores.");
852
853 MachineBasicBlock::iterator E = I->getParent()->end();
855 // If NextI is the second of the two instructions to be merged, we need
856 // to skip one further. Either way we merge will invalidate the iterator,
857 // and we don't need to scan the new instruction, as it's a pairwise
858 // instruction, which we're not considering for further action anyway.
859 if (NextI == MergeMI)
860 NextI = next_nodbg(NextI, E);
861
862 unsigned Opc = I->getOpcode();
863 unsigned MergeMIOpc = MergeMI->getOpcode();
864 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
865 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
866 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
867 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
868
869 bool MergeForward = Flags.getMergeForward();
870 // Insert our new paired instruction after whichever of the paired
871 // instructions MergeForward indicates.
872 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
873 // Also based on MergeForward is from where we copy the base register operand
874 // so we get the flags compatible with the input code.
875 const MachineOperand &BaseRegOp =
876 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
877 : AArch64InstrInfo::getLdStBaseOp(*I);
878
879 // Which register is Rt and which is Rt2 depends on the offset order.
880 int64_t IOffsetInBytes =
881 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
882 int64_t MIOffsetInBytes =
884 MergeMIOffsetStride;
885 // Select final offset based on the offset order.
886 int64_t OffsetImm;
887 if (IOffsetInBytes > MIOffsetInBytes)
888 OffsetImm = MIOffsetInBytes;
889 else
890 OffsetImm = IOffsetInBytes;
891
892 int NewOpcode = getMatchingWideOpcode(Opc);
893 // Adjust final offset on scaled stores because the new instruction
894 // has a different scale.
895 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
896 int NewOffsetStride = TII->getMemScale(NewOpcode);
897 assert(((OffsetImm % NewOffsetStride) == 0) &&
898 "Offset should be a multiple of the store memory scale");
899 OffsetImm = OffsetImm / NewOffsetStride;
900 }
901
902 // Construct the new instruction.
903 DebugLoc DL = I->getDebugLoc();
904 MachineBasicBlock *MBB = I->getParent();
905 MachineInstrBuilder MIB;
906 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
907 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
908 .add(BaseRegOp)
909 .addImm(OffsetImm)
910 .cloneMergedMemRefs({&*I, &*MergeMI})
911 .setMIFlags(I->mergeFlagsWith(*MergeMI));
912 (void)MIB;
913
914 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
915 LLVM_DEBUG(I->print(dbgs()));
916 LLVM_DEBUG(dbgs() << " ");
917 LLVM_DEBUG(MergeMI->print(dbgs()));
918 LLVM_DEBUG(dbgs() << " with instruction:\n ");
919 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
920 LLVM_DEBUG(dbgs() << "\n");
921
922 // Erase the old instructions.
923 I->eraseFromParent();
924 MergeMI->eraseFromParent();
925 return NextI;
926}
927
928// Apply Fn to all instructions between MI and the beginning of the block, until
929// a def for DefReg is reached. Returns true, iff Fn returns true for all
930// visited instructions. Stop after visiting Limit iterations.
932 const TargetRegisterInfo *TRI, unsigned Limit,
933 std::function<bool(MachineInstr &, bool)> &Fn) {
934 auto MBB = MI.getParent();
935 for (MachineInstr &I :
936 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
937 if (!Limit)
938 return false;
939 --Limit;
940
941 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
942 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
943 TRI->regsOverlap(MOP.getReg(), DefReg);
944 });
945 if (!Fn(I, isDef))
946 return false;
947 if (isDef)
948 break;
949 }
950 return true;
951}
952
954 const TargetRegisterInfo *TRI) {
955
956 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
957 if (MOP.isReg() && MOP.isKill())
958 Units.removeReg(MOP.getReg());
959
960 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
961 if (MOP.isReg() && !MOP.isKill())
962 Units.addReg(MOP.getReg());
963}
964
965/// This function will add a new entry into the debugValueSubstitutions table
966/// when two instruction have been merged into a new one represented by \p
967/// MergedInstr.
969 unsigned InstrNumToSet,
970 MachineInstr &OriginalInstr,
971 MachineInstr &MergedInstr) {
972
973 // Figure out the Operand Index of the destination register of the
974 // OriginalInstr in the new MergedInstr.
975 auto Reg = OriginalInstr.getOperand(0).getReg();
976 unsigned OperandNo = 0;
977 bool RegFound = false;
978 for (const auto Op : MergedInstr.operands()) {
979 if (Op.getReg() == Reg) {
980 RegFound = true;
981 break;
982 }
983 OperandNo++;
984 }
985
986 if (RegFound)
987 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
988 {InstrNumToSet, OperandNo});
989}
990
992AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
994 const LdStPairFlags &Flags) {
995 MachineBasicBlock::iterator E = I->getParent()->end();
997 // If NextI is the second of the two instructions to be merged, we need
998 // to skip one further. Either way we merge will invalidate the iterator,
999 // and we don't need to scan the new instruction, as it's a pairwise
1000 // instruction, which we're not considering for further action anyway.
1001 if (NextI == Paired)
1002 NextI = next_nodbg(NextI, E);
1003
1004 int SExtIdx = Flags.getSExtIdx();
1005 unsigned Opc =
1006 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1007 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1008 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1009
1010 bool MergeForward = Flags.getMergeForward();
1011
1012 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1013 if (RenameReg) {
1014 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1015 DefinedInBB.addReg(*RenameReg);
1016
1017 // Return the sub/super register for RenameReg, matching the size of
1018 // OriginalReg.
1019 auto GetMatchingSubReg =
1020 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1021 for (MCPhysReg SubOrSuper :
1022 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1023 if (C->contains(SubOrSuper))
1024 return SubOrSuper;
1025 }
1026 llvm_unreachable("Should have found matching sub or super register!");
1027 };
1028
1029 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1030 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1031 bool IsDef) {
1032 if (IsDef) {
1033 bool SeenDef = false;
1034 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1035 MachineOperand &MOP = MI.getOperand(OpIdx);
1036 // Rename the first explicit definition and all implicit
1037 // definitions matching RegToRename.
1038 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1039 (!MergeForward || !SeenDef ||
1040 (MOP.isDef() && MOP.isImplicit())) &&
1041 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1042 assert((MOP.isImplicit() ||
1043 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1044 "Need renamable operands");
1045 Register MatchingReg;
1046 if (const TargetRegisterClass *RC =
1047 MI.getRegClassConstraint(OpIdx, TII, TRI))
1048 MatchingReg = GetMatchingSubReg(RC);
1049 else {
1050 if (!isRewritableImplicitDef(MI.getOpcode()))
1051 continue;
1052 MatchingReg = GetMatchingSubReg(
1053 TRI->getMinimalPhysRegClass(MOP.getReg()));
1054 }
1055 MOP.setReg(MatchingReg);
1056 SeenDef = true;
1057 }
1058 }
1059 } else {
1060 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1061 MachineOperand &MOP = MI.getOperand(OpIdx);
1062 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1063 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1064 assert((MOP.isImplicit() ||
1065 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1066 "Need renamable operands");
1067 Register MatchingReg;
1068 if (const TargetRegisterClass *RC =
1069 MI.getRegClassConstraint(OpIdx, TII, TRI))
1070 MatchingReg = GetMatchingSubReg(RC);
1071 else
1072 MatchingReg = GetMatchingSubReg(
1073 TRI->getMinimalPhysRegClass(MOP.getReg()));
1074 assert(MatchingReg != AArch64::NoRegister &&
1075 "Cannot find matching regs for renaming");
1076 MOP.setReg(MatchingReg);
1077 }
1078 }
1079 }
1080 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1081 return true;
1082 };
1083 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1084 TRI, UINT32_MAX, UpdateMIs);
1085
1086#if !defined(NDEBUG)
1087 // For forward merging store:
1088 // Make sure the register used for renaming is not used between the
1089 // paired instructions. That would trash the content before the new
1090 // paired instruction.
1091 MCPhysReg RegToCheck = *RenameReg;
1092 // For backward merging load:
1093 // Make sure the register being renamed is not used between the
1094 // paired instructions. That would trash the content after the new
1095 // paired instruction.
1096 if (!MergeForward)
1097 RegToCheck = RegToRename;
1098 for (auto &MI :
1099 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1100 MergeForward ? std::next(I) : I,
1101 MergeForward ? std::next(Paired) : Paired))
1102 assert(all_of(MI.operands(),
1103 [this, RegToCheck](const MachineOperand &MOP) {
1104 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1105 MOP.isUndef() ||
1106 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1107 }) &&
1108 "Rename register used between paired instruction, trashing the "
1109 "content");
1110#endif
1111 }
1112
1113 // Insert our new paired instruction after whichever of the paired
1114 // instructions MergeForward indicates.
1115 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1116 // Also based on MergeForward is from where we copy the base register operand
1117 // so we get the flags compatible with the input code.
1118 const MachineOperand &BaseRegOp =
1119 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1120 : AArch64InstrInfo::getLdStBaseOp(*I);
1121
1123 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1124 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1125 if (IsUnscaled != PairedIsUnscaled) {
1126 // We're trying to pair instructions that differ in how they are scaled. If
1127 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1128 // the opposite (i.e., make Paired's offset unscaled).
1129 int MemSize = TII->getMemScale(*Paired);
1130 if (PairedIsUnscaled) {
1131 // If the unscaled offset isn't a multiple of the MemSize, we can't
1132 // pair the operations together.
1133 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1134 "Offset should be a multiple of the stride!");
1135 PairedOffset /= MemSize;
1136 } else {
1137 PairedOffset *= MemSize;
1138 }
1139 }
1140
1141 // Which register is Rt and which is Rt2 depends on the offset order.
1142 // However, for pre load/stores the Rt should be the one of the pre
1143 // load/store.
1144 MachineInstr *RtMI, *Rt2MI;
1145 if (Offset == PairedOffset + OffsetStride &&
1147 RtMI = &*Paired;
1148 Rt2MI = &*I;
1149 // Here we swapped the assumption made for SExtIdx.
1150 // I.e., we turn ldp I, Paired into ldp Paired, I.
1151 // Update the index accordingly.
1152 if (SExtIdx != -1)
1153 SExtIdx = (SExtIdx + 1) % 2;
1154 } else {
1155 RtMI = &*I;
1156 Rt2MI = &*Paired;
1157 }
1158 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1159 // Scale the immediate offset, if necessary.
1160 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1161 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1162 "Unscaled offset cannot be scaled.");
1163 OffsetImm /= TII->getMemScale(*RtMI);
1164 }
1165
1166 // Construct the new instruction.
1167 MachineInstrBuilder MIB;
1168 DebugLoc DL = I->getDebugLoc();
1169 MachineBasicBlock *MBB = I->getParent();
1170 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1171 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1172 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1173 // Kill flags may become invalid when moving stores for pairing.
1174 if (RegOp0.isUse()) {
1175 if (!MergeForward) {
1176 // Clear kill flags on store if moving upwards. Example:
1177 // STRWui kill %w0, ...
1178 // USE %w1
1179 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1180 // We are about to move the store of w1, so its kill flag may become
1181 // invalid; not the case for w0.
1182 // Since w1 is used between the stores, the kill flag on w1 is cleared
1183 // after merging.
1184 // STPWi kill %w0, %w1, ...
1185 // USE %w1
1186 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1187 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1188 PairedRegOp.setIsKill(false);
1189 } else {
1190 // Clear kill flags of the first stores register. Example:
1191 // STRWui %w1, ...
1192 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1193 // STRW %w0
1195 for (MachineInstr &MI :
1196 make_range(std::next(I->getIterator()), Paired->getIterator()))
1197 MI.clearRegisterKills(Reg, TRI);
1198 }
1199 }
1200
1201 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1202 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1203
1204 // Adds the pre-index operand for pre-indexed ld/st pairs.
1205 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1206 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1207
1208 MIB.add(RegOp0)
1209 .add(RegOp1)
1210 .add(BaseRegOp)
1211 .addImm(OffsetImm)
1212 .cloneMergedMemRefs({&*I, &*Paired})
1213 .setMIFlags(I->mergeFlagsWith(*Paired));
1214
1215 (void)MIB;
1216
1217 LLVM_DEBUG(
1218 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1219 LLVM_DEBUG(I->print(dbgs()));
1220 LLVM_DEBUG(dbgs() << " ");
1221 LLVM_DEBUG(Paired->print(dbgs()));
1222 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1223 if (SExtIdx != -1) {
1224 // Generate the sign extension for the proper result of the ldp.
1225 // I.e., with X1, that would be:
1226 // %w1 = KILL %w1, implicit-def %x1
1227 // %x1 = SBFMXri killed %x1, 0, 31
1228 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1229 // Right now, DstMO has the extended register, since it comes from an
1230 // extended opcode.
1231 Register DstRegX = DstMO.getReg();
1232 // Get the W variant of that register.
1233 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1234 // Update the result of LDP to use the W instead of the X variant.
1235 DstMO.setReg(DstRegW);
1236 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1237 LLVM_DEBUG(dbgs() << "\n");
1238 // Make the machine verifier happy by providing a definition for
1239 // the X register.
1240 // Insert this definition right after the generated LDP, i.e., before
1241 // InsertionPoint.
1242 MachineInstrBuilder MIBKill =
1243 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1244 .addReg(DstRegW)
1245 .addReg(DstRegX, RegState::Define);
1246 MIBKill->getOperand(2).setImplicit();
1247 // Create the sign extension.
1248 MachineInstrBuilder MIBSXTW =
1249 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1250 .addReg(DstRegX)
1251 .addImm(0)
1252 .addImm(31);
1253 (void)MIBSXTW;
1254
1255 // In the case of a sign-extend, where we have something like:
1256 // debugValueSubstitutions:[]
1257 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1258 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1259 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1260 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1261
1262 // It will be converted to:
1263 // debugValueSubstitutions:[]
1264 // $w0, $w1 = LDPWi $x0, 0
1265 // $w0 = KILL $w0, implicit-def $x0
1266 // $x0 = SBFMXri $x0, 0, 31
1267 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1268 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1269
1270 // We want the final result to look like:
1271 // debugValueSubstitutions:
1272 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1273 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1274 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1275 // $w0 = KILL $w0, implicit-def $x0
1276 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1277 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1278 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1279
1280 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1281 // instruction contains the final value we care about we give it a new
1282 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1283 // about, therefore the LDP instruction is also given a new
1284 // debug-instr-number 4. We have to add these substitutions to the
1285 // debugValueSubstitutions table. However, we also have to ensure that the
1286 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1287 // $w1 is the second operand of the LDP instruction.
1288
1289 if (I->peekDebugInstrNum()) {
1290 // If I is the instruction which got sign extended and has a
1291 // debug-instr-number, give the SBFMXri instruction a new
1292 // debug-instr-number, and update the debugValueSubstitutions table with
1293 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1294 // instruction a new debug-instr-number, and update the
1295 // debugValueSubstitutions table with the new debug-instr-number and
1296 // OpIndex pair.
1297 unsigned NewInstrNum;
1298 if (DstRegX == I->getOperand(0).getReg()) {
1299 NewInstrNum = MIBSXTW->getDebugInstrNum();
1300 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1301 *MIBSXTW);
1302 } else {
1303 NewInstrNum = MIB->getDebugInstrNum();
1304 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1305 }
1306 }
1307 if (Paired->peekDebugInstrNum()) {
1308 // If Paired is the instruction which got sign extended and has a
1309 // debug-instr-number, give the SBFMXri instruction a new
1310 // debug-instr-number, and update the debugValueSubstitutions table with
1311 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1312 // instruction a new debug-instr-number, and update the
1313 // debugValueSubstitutions table with the new debug-instr-number and
1314 // OpIndex pair.
1315 unsigned NewInstrNum;
1316 if (DstRegX == Paired->getOperand(0).getReg()) {
1317 NewInstrNum = MIBSXTW->getDebugInstrNum();
1318 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1319 *MIBSXTW);
1320 } else {
1321 NewInstrNum = MIB->getDebugInstrNum();
1322 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1323 *MIB);
1324 }
1325 }
1326
1327 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1328 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1329 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1330 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1331 // variant of the registers.
1332 MachineOperand &MOp0 = MIB->getOperand(0);
1333 MachineOperand &MOp1 = MIB->getOperand(1);
1334 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1335 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1336 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1337 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1338 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1339 } else {
1340
1341 // In the case that the merge doesn't result in a sign-extend, if we have
1342 // something like:
1343 // debugValueSubstitutions:[]
1344 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1345 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1346 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1347 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1348
1349 // It will be converted to:
1350 // debugValueSubstitutions: []
1351 // $x0, $x1 = LDPXi $x0, 0
1352 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1353 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1354
1355 // We want the final result to look like:
1356 // debugValueSubstitutions:
1357 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1358 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1359 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1360 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1361 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1362
1363 // Here all that needs to be done is, that the LDP instruction needs to be
1364 // updated with a new debug-instr-number, we then need to add entries into
1365 // the debugSubstitutions table to map the old instr-refs to the new ones.
1366
1367 // Assign new DebugInstrNum to the Paired instruction.
1368 if (I->peekDebugInstrNum()) {
1369 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1370 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1371 *MIB);
1372 }
1373 if (Paired->peekDebugInstrNum()) {
1374 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1375 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1376 *MIB);
1377 }
1378
1379 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1380 }
1381 LLVM_DEBUG(dbgs() << "\n");
1382
1383 if (MergeForward)
1384 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1385 if (MOP.isReg() && MOP.isKill())
1386 DefinedInBB.addReg(MOP.getReg());
1387
1388 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1389 // only copies implicit defs and makes sure that each operand is only added
1390 // once in case of duplicates.
1391 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1393 SmallSetVector<Register, 4> Ops;
1394 for (const MachineOperand &MO :
1395 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1396 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1397 Ops.insert(MO.getReg());
1398 for (const MachineOperand &MO :
1399 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1400 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1401 Ops.insert(MO.getReg());
1402 for (auto Op : Ops)
1403 MIB.addDef(Op, RegState::Implicit);
1404 };
1405 CopyImplicitOps(I, Paired);
1406
1407 // Erase the old instructions.
1408 I->eraseFromParent();
1409 Paired->eraseFromParent();
1410
1411 return NextI;
1412}
1413
1415AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1418 next_nodbg(LoadI, LoadI->getParent()->end());
1419
1420 int LoadSize = TII->getMemScale(*LoadI);
1421 int StoreSize = TII->getMemScale(*StoreI);
1422 Register LdRt = getLdStRegOp(*LoadI).getReg();
1423 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1424 Register StRt = getLdStRegOp(*StoreI).getReg();
1425 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1426
1427 assert((IsStoreXReg ||
1428 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1429 "Unexpected RegClass");
1430
1431 MachineInstr *BitExtMI;
1432 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1433 // Remove the load, if the destination register of the loads is the same
1434 // register for stored value.
1435 if (StRt == LdRt && LoadSize == 8) {
1436 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1437 LoadI->getIterator())) {
1438 if (MI.killsRegister(StRt, TRI)) {
1439 MI.clearRegisterKills(StRt, TRI);
1440 break;
1441 }
1442 }
1443 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1444 LLVM_DEBUG(LoadI->print(dbgs()));
1445 LLVM_DEBUG(dbgs() << "\n");
1446 LoadI->eraseFromParent();
1447 return NextI;
1448 }
1449 // Replace the load with a mov if the load and store are in the same size.
1450 BitExtMI =
1451 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1452 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1453 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1454 .add(StMO)
1456 .setMIFlags(LoadI->getFlags());
1457 } else {
1458 // FIXME: Currently we disable this transformation in big-endian targets as
1459 // performance and correctness are verified only in little-endian.
1460 if (!Subtarget->isLittleEndian())
1461 return NextI;
1462 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1463 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1464 "Unsupported ld/st match");
1465 assert(LoadSize <= StoreSize && "Invalid load size");
1466 int UnscaledLdOffset =
1467 IsUnscaled
1469 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1470 int UnscaledStOffset =
1471 IsUnscaled
1473 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1474 int Width = LoadSize * 8;
1475 Register DestReg =
1476 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1477 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1478 : LdRt;
1479
1480 assert((UnscaledLdOffset >= UnscaledStOffset &&
1481 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1482 "Invalid offset");
1483
1484 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1485 int Imms = Immr + Width - 1;
1486 if (UnscaledLdOffset == UnscaledStOffset) {
1487 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1488 | ((Immr) << 6) // immr
1489 | ((Imms) << 0) // imms
1490 ;
1491
1492 BitExtMI =
1493 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1494 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1495 DestReg)
1496 .add(StMO)
1497 .addImm(AndMaskEncoded)
1498 .setMIFlags(LoadI->getFlags());
1499 } else if (IsStoreXReg && Imms == 31) {
1500 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1501 // instruction.
1502 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1503 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1504 TII->get(AArch64::UBFMWri),
1505 TRI->getSubReg(DestReg, AArch64::sub_32))
1506 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1507 .addImm(Immr)
1508 .addImm(Imms)
1509 .setMIFlags(LoadI->getFlags());
1510 } else {
1511 BitExtMI =
1512 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1513 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1514 DestReg)
1515 .add(StMO)
1516 .addImm(Immr)
1517 .addImm(Imms)
1518 .setMIFlags(LoadI->getFlags());
1519 }
1520 }
1521
1522 // Clear kill flags between store and load.
1523 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1524 BitExtMI->getIterator()))
1525 if (MI.killsRegister(StRt, TRI)) {
1526 MI.clearRegisterKills(StRt, TRI);
1527 break;
1528 }
1529
1530 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1531 LLVM_DEBUG(StoreI->print(dbgs()));
1532 LLVM_DEBUG(dbgs() << " ");
1533 LLVM_DEBUG(LoadI->print(dbgs()));
1534 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1535 LLVM_DEBUG(StoreI->print(dbgs()));
1536 LLVM_DEBUG(dbgs() << " ");
1537 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1538 LLVM_DEBUG(dbgs() << "\n");
1539
1540 // Erase the old instructions.
1541 LoadI->eraseFromParent();
1542 return NextI;
1543}
1544
1545static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1546 // Convert the byte-offset used by unscaled into an "element" offset used
1547 // by the scaled pair load/store instructions.
1548 if (IsUnscaled) {
1549 // If the byte-offset isn't a multiple of the stride, there's no point
1550 // trying to match it.
1551 if (Offset % OffsetStride)
1552 return false;
1553 Offset /= OffsetStride;
1554 }
1555 return Offset <= 63 && Offset >= -64;
1556}
1557
1558// Do alignment, specialized to power of 2 and for signed ints,
1559// avoiding having to do a C-style cast from uint_64t to int when
1560// using alignTo from include/llvm/Support/MathExtras.h.
1561// FIXME: Move this function to include/MathExtras.h?
1562static int alignTo(int Num, int PowOf2) {
1563 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1564}
1565
1566static bool mayAlias(MachineInstr &MIa,
1568 AliasAnalysis *AA) {
1569 for (MachineInstr *MIb : MemInsns) {
1570 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1571 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1572 return true;
1573 }
1574 }
1575
1576 LLVM_DEBUG(dbgs() << "No aliases found\n");
1577 return false;
1578}
1579
1580bool AArch64LoadStoreOpt::findMatchingStore(
1581 MachineBasicBlock::iterator I, unsigned Limit,
1583 MachineBasicBlock::iterator B = I->getParent()->begin();
1585 MachineInstr &LoadMI = *I;
1587
1588 // If the load is the first instruction in the block, there's obviously
1589 // not any matching store.
1590 if (MBBI == B)
1591 return false;
1592
1593 // Track which register units have been modified and used between the first
1594 // insn and the second insn.
1595 ModifiedRegUnits.clear();
1596 UsedRegUnits.clear();
1597
1598 unsigned Count = 0;
1599 do {
1600 MBBI = prev_nodbg(MBBI, B);
1601 MachineInstr &MI = *MBBI;
1602
1603 // Don't count transient instructions towards the search limit since there
1604 // may be different numbers of them if e.g. debug information is present.
1605 if (!MI.isTransient())
1606 ++Count;
1607
1608 // If the load instruction reads directly from the address to which the
1609 // store instruction writes and the stored value is not modified, we can
1610 // promote the load. Since we do not handle stores with pre-/post-index,
1611 // it's unnecessary to check if BaseReg is modified by the store itself.
1612 // Also we can't handle stores without an immediate offset operand,
1613 // while the operand might be the address for a global variable.
1614 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1617 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1618 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1619 StoreI = MBBI;
1620 return true;
1621 }
1622
1623 if (MI.isCall())
1624 return false;
1625
1626 // Update modified / uses register units.
1627 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1628
1629 // Otherwise, if the base register is modified, we have no match, so
1630 // return early.
1631 if (!ModifiedRegUnits.available(BaseReg))
1632 return false;
1633
1634 // If we encounter a store aliased with the load, return early.
1635 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1636 return false;
1637 } while (MBBI != B && Count < Limit);
1638 return false;
1639}
1640
1641static bool needsWinCFI(const MachineFunction *MF) {
1642 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1644}
1645
1646// Returns true if FirstMI and MI are candidates for merging or pairing.
1647// Otherwise, returns false.
1649 LdStPairFlags &Flags,
1650 const AArch64InstrInfo *TII) {
1651 // If this is volatile or if pairing is suppressed, not a candidate.
1652 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1653 return false;
1654
1655 // We should have already checked FirstMI for pair suppression and volatility.
1656 assert(!FirstMI.hasOrderedMemoryRef() &&
1657 !TII->isLdStPairSuppressed(FirstMI) &&
1658 "FirstMI shouldn't get here if either of these checks are true.");
1659
1660 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1662 return false;
1663
1664 unsigned OpcA = FirstMI.getOpcode();
1665 unsigned OpcB = MI.getOpcode();
1666
1667 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1668 if (OpcA == OpcB)
1669 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1670
1671 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1672 // allow pairing them with other instructions.
1673 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1674 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1675 return false;
1676
1677 // Two pre ld/st of different opcodes cannot be merged either
1679 return false;
1680
1681 // Try to match a sign-extended load/store with a zero-extended load/store.
1682 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1683 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1684 assert(IsValidLdStrOpc &&
1685 "Given Opc should be a Load or Store with an immediate");
1686 // OpcA will be the first instruction in the pair.
1687 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1688 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1689 return true;
1690 }
1691
1692 // If the second instruction isn't even a mergable/pairable load/store, bail
1693 // out.
1694 if (!PairIsValidLdStrOpc)
1695 return false;
1696
1697 // Narrow stores do not have a matching pair opcodes, so constrain their
1698 // merging to zero stores.
1699 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1700 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1701 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1702 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1703
1704 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1705 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1706 // are candidate pairs that can be merged.
1707 if (isPreLdStPairCandidate(FirstMI, MI))
1708 return true;
1709
1710 // Try to match an unscaled load/store with a scaled load/store.
1711 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1713
1714 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1715}
1716
1717static bool canRenameMOP(const MachineOperand &MOP,
1718 const TargetRegisterInfo *TRI) {
1719 if (MOP.isReg()) {
1720 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1721 // Renaming registers with multiple disjunct sub-registers (e.g. the
1722 // result of a LD3) means that all sub-registers are renamed, potentially
1723 // impacting other instructions we did not check. Bail out.
1724 // Note that this relies on the structure of the AArch64 register file. In
1725 // particular, a subregister cannot be written without overwriting the
1726 // whole register.
1727 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1728 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1729 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1730 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1731 LLVM_DEBUG(
1732 dbgs()
1733 << " Cannot rename operands with multiple disjunct subregisters ("
1734 << MOP << ")\n");
1735 return false;
1736 }
1737
1738 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1739 // them must be known. For example, in ORRWrs the implicit-def
1740 // corresponds to the result register.
1741 if (MOP.isImplicit() && MOP.isDef()) {
1743 return false;
1744 return TRI->isSuperOrSubRegisterEq(
1745 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1746 }
1747 }
1748 return MOP.isImplicit() ||
1749 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1750}
1751
1752static bool
1755 const TargetRegisterInfo *TRI) {
1756 if (!FirstMI.mayStore())
1757 return false;
1758
1759 // Check if we can find an unused register which we can use to rename
1760 // the register used by the first load/store.
1761
1762 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1763 // For now, we only rename if the store operand gets killed at the store.
1764 if (!getLdStRegOp(FirstMI).isKill() &&
1765 !any_of(FirstMI.operands(),
1766 [TRI, RegToRename](const MachineOperand &MOP) {
1767 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1768 MOP.isImplicit() && MOP.isKill() &&
1769 TRI->regsOverlap(RegToRename, MOP.getReg());
1770 })) {
1771 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1772 return false;
1773 }
1774
1775 bool FoundDef = false;
1776
1777 // For each instruction between FirstMI and the previous def for RegToRename,
1778 // we
1779 // * check if we can rename RegToRename in this instruction
1780 // * collect the registers used and required register classes for RegToRename.
1781 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1782 bool IsDef) {
1783 LLVM_DEBUG(dbgs() << "Checking " << MI);
1784 // Currently we do not try to rename across frame-setup instructions.
1785 if (MI.getFlag(MachineInstr::FrameSetup)) {
1786 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1787 << "currently\n");
1788 return false;
1789 }
1790
1791 UsedInBetween.accumulate(MI);
1792
1793 // For a definition, check that we can rename the definition and exit the
1794 // loop.
1795 FoundDef = IsDef;
1796
1797 // For defs, check if we can rename the first def of RegToRename.
1798 if (FoundDef) {
1799 // For some pseudo instructions, we might not generate code in the end
1800 // (e.g. KILL) and we would end up without a correct def for the rename
1801 // register.
1802 // TODO: This might be overly conservative and we could handle those cases
1803 // in multiple ways:
1804 // 1. Insert an extra copy, to materialize the def.
1805 // 2. Skip pseudo-defs until we find an non-pseudo def.
1806 if (MI.isPseudo()) {
1807 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1808 return false;
1809 }
1810
1811 for (auto &MOP : MI.operands()) {
1812 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1813 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1814 continue;
1815 if (!canRenameMOP(MOP, TRI)) {
1816 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1817 return false;
1818 }
1819 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1820 }
1821 return true;
1822 } else {
1823 for (auto &MOP : MI.operands()) {
1824 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1825 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1826 continue;
1827
1828 if (!canRenameMOP(MOP, TRI)) {
1829 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1830 return false;
1831 }
1832 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1833 }
1834 }
1835 return true;
1836 };
1837
1838 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1839 return false;
1840
1841 if (!FoundDef) {
1842 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1843 return false;
1844 }
1845 return true;
1846}
1847
1848// We want to merge the second load into the first by rewriting the usages of
1849// the same reg between first (incl.) and second (excl.). We don't need to care
1850// about any insns before FirstLoad or after SecondLoad.
1851// 1. The second load writes new value into the same reg.
1852// - The renaming is impossible to impact later use of the reg.
1853// - The second load always trash the value written by the first load which
1854// means the reg must be killed before the second load.
1855// 2. The first load must be a def for the same reg so we don't need to look
1856// into anything before it.
1858 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1859 LiveRegUnits &UsedInBetween,
1861 const TargetRegisterInfo *TRI) {
1862 if (FirstLoad.isPseudo())
1863 return false;
1864
1865 UsedInBetween.accumulate(FirstLoad);
1866 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1867 bool Success = std::all_of(
1868 FirstLoad.getIterator(), SecondLoad.getIterator(),
1869 [&](MachineInstr &MI) {
1870 LLVM_DEBUG(dbgs() << "Checking " << MI);
1871 // Currently we do not try to rename across frame-setup instructions.
1872 if (MI.getFlag(MachineInstr::FrameSetup)) {
1873 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1874 << "currently\n");
1875 return false;
1876 }
1877
1878 for (auto &MOP : MI.operands()) {
1879 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1880 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1881 continue;
1882 if (!canRenameMOP(MOP, TRI)) {
1883 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1884 return false;
1885 }
1886 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1887 }
1888
1889 return true;
1890 });
1891 return Success;
1892}
1893
1894// Check if we can find a physical register for renaming \p Reg. This register
1895// must:
1896// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1897// defined registers up to the point where the renamed register will be used,
1898// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1899// registers in the range the rename register will be used,
1900// * is available in all used register classes (checked using RequiredClasses).
1901static std::optional<MCPhysReg> tryToFindRegisterToRename(
1902 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1903 LiveRegUnits &UsedInBetween,
1905 const TargetRegisterInfo *TRI) {
1907
1908 // Checks if any sub- or super-register of PR is callee saved.
1909 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1910 return any_of(TRI->sub_and_superregs_inclusive(PR),
1911 [&MF, TRI](MCPhysReg SubOrSuper) {
1912 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1913 });
1914 };
1915
1916 // Check if PR or one of its sub- or super-registers can be used for all
1917 // required register classes.
1918 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1919 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1920 return any_of(
1921 TRI->sub_and_superregs_inclusive(PR),
1922 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1923 });
1924 };
1925
1926 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1927 for (const MCPhysReg &PR : *RegClass) {
1928 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1929 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1930 CanBeUsedForAllClasses(PR)) {
1931 DefinedInBB.addReg(PR);
1932 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1933 << "\n");
1934 return {PR};
1935 }
1936 }
1937 LLVM_DEBUG(dbgs() << "No rename register found from "
1938 << TRI->getRegClassName(RegClass) << "\n");
1939 return std::nullopt;
1940}
1941
1942// For store pairs: returns a register from FirstMI to the beginning of the
1943// block that can be renamed.
1944// For load pairs: returns a register from FirstMI to MI that can be renamed.
1945static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1946 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1947 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1949 const TargetRegisterInfo *TRI) {
1950 std::optional<MCPhysReg> RenameReg;
1951 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1952 return RenameReg;
1953
1954 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1955 MachineFunction &MF = *FirstMI.getParent()->getParent();
1956 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1957 return RenameReg;
1958
1959 const bool IsLoad = FirstMI.mayLoad();
1960
1961 if (!MaybeCanRename) {
1962 if (IsLoad)
1963 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1964 RequiredClasses, TRI)};
1965 else
1966 MaybeCanRename = {
1967 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1968 }
1969
1970 if (*MaybeCanRename) {
1971 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1972 RequiredClasses, TRI);
1973 }
1974 return RenameReg;
1975}
1976
1977/// Scan the instructions looking for a load/store that can be combined with the
1978/// current instruction into a wider equivalent or a load/store pair.
1980AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1981 LdStPairFlags &Flags, unsigned Limit,
1982 bool FindNarrowMerge) {
1983 MachineBasicBlock::iterator E = I->getParent()->end();
1985 MachineBasicBlock::iterator MBBIWithRenameReg;
1986 MachineInstr &FirstMI = *I;
1987 MBBI = next_nodbg(MBBI, E);
1988
1989 bool MayLoad = FirstMI.mayLoad();
1990 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1991 Register Reg = getLdStRegOp(FirstMI).getReg();
1994 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1995 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1996
1997 std::optional<bool> MaybeCanRename;
1998 if (!EnableRenaming)
1999 MaybeCanRename = {false};
2000
2001 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2002 LiveRegUnits UsedInBetween;
2003 UsedInBetween.init(*TRI);
2004
2005 Flags.clearRenameReg();
2006
2007 // Track which register units have been modified and used between the first
2008 // insn (inclusive) and the second insn.
2009 ModifiedRegUnits.clear();
2010 UsedRegUnits.clear();
2011
2012 // Remember any instructions that read/write memory between FirstMI and MI.
2013 SmallVector<MachineInstr *, 4> MemInsns;
2014
2015 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2016 for (unsigned Count = 0; MBBI != E && Count < Limit;
2017 MBBI = next_nodbg(MBBI, E)) {
2018 MachineInstr &MI = *MBBI;
2019 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2020
2021 UsedInBetween.accumulate(MI);
2022
2023 // Don't count transient instructions towards the search limit since there
2024 // may be different numbers of them if e.g. debug information is present.
2025 if (!MI.isTransient())
2026 ++Count;
2027
2028 Flags.setSExtIdx(-1);
2029 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2031 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2032 // If we've found another instruction with the same opcode, check to see
2033 // if the base and offset are compatible with our starting instruction.
2034 // These instructions all have scaled immediate operands, so we just
2035 // check for +1/-1. Make sure to check the new instruction offset is
2036 // actually an immediate and not a symbolic reference destined for
2037 // a relocation.
2040 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2041 if (IsUnscaled != MIIsUnscaled) {
2042 // We're trying to pair instructions that differ in how they are scaled.
2043 // If FirstMI is scaled then scale the offset of MI accordingly.
2044 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2045 int MemSize = TII->getMemScale(MI);
2046 if (MIIsUnscaled) {
2047 // If the unscaled offset isn't a multiple of the MemSize, we can't
2048 // pair the operations together: bail and keep looking.
2049 if (MIOffset % MemSize) {
2050 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2051 UsedRegUnits, TRI);
2052 MemInsns.push_back(&MI);
2053 continue;
2054 }
2055 MIOffset /= MemSize;
2056 } else {
2057 MIOffset *= MemSize;
2058 }
2059 }
2060
2061 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2062
2063 if (BaseReg == MIBaseReg) {
2064 // If the offset of the second ld/st is not equal to the size of the
2065 // destination register it can’t be paired with a pre-index ld/st
2066 // pair. Additionally if the base reg is used or modified the operations
2067 // can't be paired: bail and keep looking.
2068 if (IsPreLdSt) {
2069 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2070 bool IsBaseRegUsed = !UsedRegUnits.available(
2072 bool IsBaseRegModified = !ModifiedRegUnits.available(
2074 // If the stored value and the address of the second instruction is
2075 // the same, it needs to be using the updated register and therefore
2076 // it must not be folded.
2077 bool IsMIRegTheSame =
2078 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2080 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2081 IsMIRegTheSame) {
2082 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2083 UsedRegUnits, TRI);
2084 MemInsns.push_back(&MI);
2085 continue;
2086 }
2087 } else {
2088 if ((Offset != MIOffset + OffsetStride) &&
2089 (Offset + OffsetStride != MIOffset)) {
2090 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2091 UsedRegUnits, TRI);
2092 MemInsns.push_back(&MI);
2093 continue;
2094 }
2095 }
2096
2097 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2098 if (FindNarrowMerge) {
2099 // If the alignment requirements of the scaled wide load/store
2100 // instruction can't express the offset of the scaled narrow input,
2101 // bail and keep looking. For promotable zero stores, allow only when
2102 // the stored value is the same (i.e., WZR).
2103 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2104 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2105 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2106 UsedRegUnits, TRI);
2107 MemInsns.push_back(&MI);
2108 continue;
2109 }
2110 } else {
2111 // Pairwise instructions have a 7-bit signed offset field. Single
2112 // insns have a 12-bit unsigned offset field. If the resultant
2113 // immediate offset of merging these instructions is out of range for
2114 // a pairwise instruction, bail and keep looking.
2115 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2116 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2117 UsedRegUnits, TRI);
2118 MemInsns.push_back(&MI);
2119 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2120 << "keep looking.\n");
2121 continue;
2122 }
2123 // If the alignment requirements of the paired (scaled) instruction
2124 // can't express the offset of the unscaled input, bail and keep
2125 // looking.
2126 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2127 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2128 UsedRegUnits, TRI);
2129 MemInsns.push_back(&MI);
2131 << "Offset doesn't fit due to alignment requirements, "
2132 << "keep looking.\n");
2133 continue;
2134 }
2135 }
2136
2137 // If the BaseReg has been modified, then we cannot do the optimization.
2138 // For example, in the following pattern
2139 // ldr x1 [x2]
2140 // ldr x2 [x3]
2141 // ldr x4 [x2, #8],
2142 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2143 if (!ModifiedRegUnits.available(BaseReg))
2144 return E;
2145
2146 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2148
2149 // If the Rt of the second instruction (destination register of the
2150 // load) was not modified or used between the two instructions and none
2151 // of the instructions between the second and first alias with the
2152 // second, we can combine the second into the first.
2153 bool RtNotModified =
2154 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2155 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2156 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2157
2158 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2159 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2160 << (RtNotModified ? "true" : "false") << "\n"
2161 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2162 << (RtNotUsed ? "true" : "false") << "\n");
2163
2164 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2165 // For pairs loading into the same reg, try to find a renaming
2166 // opportunity to allow the renaming of Reg between FirstMI and MI
2167 // and combine MI into FirstMI; otherwise bail and keep looking.
2168 if (SameLoadReg) {
2169 std::optional<MCPhysReg> RenameReg =
2170 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2171 Reg, DefinedInBB, UsedInBetween,
2172 RequiredClasses, TRI);
2173 if (!RenameReg) {
2174 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2175 UsedRegUnits, TRI);
2176 MemInsns.push_back(&MI);
2177 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2178 << "keep looking.\n");
2179 continue;
2180 }
2181 Flags.setRenameReg(*RenameReg);
2182 }
2183
2184 Flags.setMergeForward(false);
2185 if (!SameLoadReg)
2186 Flags.clearRenameReg();
2187 return MBBI;
2188 }
2189
2190 // Likewise, if the Rt of the first instruction is not modified or used
2191 // between the two instructions and none of the instructions between the
2192 // first and the second alias with the first, we can combine the first
2193 // into the second.
2194 RtNotModified = !(
2195 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2196
2197 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2198 << "Reg '" << getLdStRegOp(FirstMI)
2199 << "' not modified: "
2200 << (RtNotModified ? "true" : "false") << "\n");
2201
2202 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2203 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2204 Flags.setMergeForward(true);
2205 Flags.clearRenameReg();
2206 return MBBI;
2207 }
2208
2209 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2210 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2211 RequiredClasses, TRI);
2212 if (RenameReg) {
2213 Flags.setMergeForward(true);
2214 Flags.setRenameReg(*RenameReg);
2215 MBBIWithRenameReg = MBBI;
2216 }
2217 }
2218 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2219 << "interference in between, keep looking.\n");
2220 }
2221 }
2222
2223 if (Flags.getRenameReg())
2224 return MBBIWithRenameReg;
2225
2226 // If the instruction wasn't a matching load or store. Stop searching if we
2227 // encounter a call instruction that might modify memory.
2228 if (MI.isCall()) {
2229 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2230 return E;
2231 }
2232
2233 // Update modified / uses register units.
2234 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2235
2236 // Otherwise, if the base register is modified, we have no match, so
2237 // return early.
2238 if (!ModifiedRegUnits.available(BaseReg)) {
2239 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2240 return E;
2241 }
2242
2243 // Update list of instructions that read/write memory.
2244 if (MI.mayLoadOrStore())
2245 MemInsns.push_back(&MI);
2246 }
2247 return E;
2248}
2249
2252 assert((MI.getOpcode() == AArch64::SUBXri ||
2253 MI.getOpcode() == AArch64::ADDXri) &&
2254 "Expected a register update instruction");
2255 auto End = MI.getParent()->end();
2256 if (MaybeCFI == End ||
2257 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2258 !(MI.getFlag(MachineInstr::FrameSetup) ||
2259 MI.getFlag(MachineInstr::FrameDestroy)) ||
2260 MI.getOperand(0).getReg() != AArch64::SP)
2261 return End;
2262
2263 const MachineFunction &MF = *MI.getParent()->getParent();
2264 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2265 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2266 switch (CFI.getOperation()) {
2269 return MaybeCFI;
2270 default:
2271 return End;
2272 }
2273}
2274
2275std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2277 bool IsForward, bool IsPreIdx, bool MergeEither) {
2278 assert((Update->getOpcode() == AArch64::ADDXri ||
2279 Update->getOpcode() == AArch64::SUBXri) &&
2280 "Unexpected base register update instruction to merge!");
2281 MachineBasicBlock::iterator E = I->getParent()->end();
2283
2284 // If updating the SP and the following instruction is CFA offset related CFI,
2285 // make sure the CFI follows the SP update either by merging at the location
2286 // of the update or by moving the CFI after the merged instruction. If unable
2287 // to do so, bail.
2288 MachineBasicBlock::iterator InsertPt = I;
2289 if (IsForward) {
2290 assert(IsPreIdx);
2291 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2292 if (MergeEither) {
2293 InsertPt = Update;
2294 } else {
2295 // Take care not to reorder CFIs.
2296 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2297 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2298 }))
2299 return std::nullopt;
2300
2301 MachineBasicBlock *MBB = InsertPt->getParent();
2302 MBB->splice(std::next(InsertPt), MBB, CFI);
2303 }
2304 }
2305 }
2306
2307 // Return the instruction following the merged instruction, which is
2308 // the instruction following our unmerged load. Unless that's the add/sub
2309 // instruction we're merging, in which case it's the one after that.
2310 if (NextI == Update)
2311 NextI = next_nodbg(NextI, E);
2312
2313 int Value = Update->getOperand(2).getImm();
2314 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2315 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2316 if (Update->getOpcode() == AArch64::SUBXri)
2317 Value = -Value;
2318
2319 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2321 MachineInstrBuilder MIB;
2322 int Scale, MinOffset, MaxOffset;
2323 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2325 // Non-paired instruction.
2326 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2327 TII->get(NewOpc))
2328 .add(Update->getOperand(0))
2329 .add(getLdStRegOp(*I))
2331 .addImm(Value / Scale)
2332 .setMemRefs(I->memoperands())
2333 .setMIFlags(I->mergeFlagsWith(*Update));
2334 } else {
2335 // Paired instruction.
2336 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2337 TII->get(NewOpc))
2338 .add(Update->getOperand(0))
2339 .add(getLdStRegOp(*I, 0))
2340 .add(getLdStRegOp(*I, 1))
2342 .addImm(Value / Scale)
2343 .setMemRefs(I->memoperands())
2344 .setMIFlags(I->mergeFlagsWith(*Update));
2345 }
2346
2347 if (IsPreIdx) {
2348 ++NumPreFolded;
2349 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2350 } else {
2351 ++NumPostFolded;
2352 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2353 }
2354 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2355 LLVM_DEBUG(I->print(dbgs()));
2356 LLVM_DEBUG(dbgs() << " ");
2357 LLVM_DEBUG(Update->print(dbgs()));
2358 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2359 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2360 LLVM_DEBUG(dbgs() << "\n");
2361
2362 // Erase the old instructions for the block.
2363 I->eraseFromParent();
2364 Update->eraseFromParent();
2365
2366 return NextI;
2367}
2368
2370AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2372 unsigned Offset, int Scale) {
2373 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2374 "Unexpected const mov instruction to merge!");
2375 MachineBasicBlock::iterator E = I->getParent()->end();
2377 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2378 MachineInstr &MemMI = *I;
2379 unsigned Mask = (1 << 12) * Scale - 1;
2380 unsigned Low = Offset & Mask;
2381 unsigned High = Offset - Low;
2384 MachineInstrBuilder AddMIB, MemMIB;
2385
2386 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2387 AddMIB =
2388 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2389 .addDef(IndexReg)
2390 .addUse(BaseReg)
2391 .addImm(High >> 12) // shifted value
2392 .addImm(12); // shift 12
2393 (void)AddMIB;
2394 // Ld/St DestReg, IndexReg, Imm12
2395 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2396 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2397 .add(getLdStRegOp(MemMI))
2399 .addImm(Low / Scale)
2400 .setMemRefs(I->memoperands())
2401 .setMIFlags(I->mergeFlagsWith(*Update));
2402 (void)MemMIB;
2403
2404 ++NumConstOffsetFolded;
2405 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2406 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2407 LLVM_DEBUG(PrevI->print(dbgs()));
2408 LLVM_DEBUG(dbgs() << " ");
2409 LLVM_DEBUG(Update->print(dbgs()));
2410 LLVM_DEBUG(dbgs() << " ");
2411 LLVM_DEBUG(I->print(dbgs()));
2412 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2413 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2414 LLVM_DEBUG(dbgs() << " ");
2415 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2416 LLVM_DEBUG(dbgs() << "\n");
2417
2418 // Erase the old instructions for the block.
2419 I->eraseFromParent();
2420 PrevI->eraseFromParent();
2421 Update->eraseFromParent();
2422
2423 return NextI;
2424}
2425
2426bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2427 MachineInstr &MI,
2428 unsigned BaseReg, int Offset) {
2429 switch (MI.getOpcode()) {
2430 default:
2431 break;
2432 case AArch64::SUBXri:
2433 case AArch64::ADDXri:
2434 // Make sure it's a vanilla immediate operand, not a relocation or
2435 // anything else we can't handle.
2436 if (!MI.getOperand(2).isImm())
2437 break;
2438 // Watch out for 1 << 12 shifted value.
2439 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2440 break;
2441
2442 // The update instruction source and destination register must be the
2443 // same as the load/store base register.
2444 if (MI.getOperand(0).getReg() != BaseReg ||
2445 MI.getOperand(1).getReg() != BaseReg)
2446 break;
2447
2448 int UpdateOffset = MI.getOperand(2).getImm();
2449 if (MI.getOpcode() == AArch64::SUBXri)
2450 UpdateOffset = -UpdateOffset;
2451
2452 // The immediate must be a multiple of the scaling factor of the pre/post
2453 // indexed instruction.
2454 int Scale, MinOffset, MaxOffset;
2455 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2456 if (UpdateOffset % Scale != 0)
2457 break;
2458
2459 // Scaled offset must fit in the instruction immediate.
2460 int ScaledOffset = UpdateOffset / Scale;
2461 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2462 break;
2463
2464 // If we have a non-zero Offset, we check that it matches the amount
2465 // we're adding to the register.
2466 if (!Offset || Offset == UpdateOffset)
2467 return true;
2468 break;
2469 }
2470 return false;
2471}
2472
2473bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2474 MachineInstr &MI,
2475 unsigned IndexReg,
2476 unsigned &Offset) {
2477 // The update instruction source and destination register must be the
2478 // same as the load/store index register.
2479 if (MI.getOpcode() == AArch64::MOVKWi &&
2480 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2481
2482 // movz + movk hold a large offset of a Ld/St instruction.
2483 MachineBasicBlock::iterator B = MI.getParent()->begin();
2485 // Skip the scene when the MI is the first instruction of a block.
2486 if (MBBI == B)
2487 return false;
2488 MBBI = prev_nodbg(MBBI, B);
2489 MachineInstr &MovzMI = *MBBI;
2490 // Make sure the MOVKWi and MOVZWi set the same register.
2491 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2492 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2493 unsigned Low = MovzMI.getOperand(1).getImm();
2494 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2495 Offset = High + Low;
2496 // 12-bit optionally shifted immediates are legal for adds.
2497 return Offset >> 24 == 0;
2498 }
2499 }
2500 return false;
2501}
2502
2503MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2504 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2505 MachineBasicBlock::iterator E = I->getParent()->end();
2506 MachineInstr &MemMI = *I;
2508
2510 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2511 TII->getMemScale(MemMI);
2512
2513 // Scan forward looking for post-index opportunities. Updating instructions
2514 // can't be formed if the memory instruction doesn't have the offset we're
2515 // looking for.
2516 if (MIUnscaledOffset != UnscaledOffset)
2517 return E;
2518
2519 // If the base register overlaps a source/destination register, we can't
2520 // merge the update. This does not apply to tag store instructions which
2521 // ignore the address part of the source register.
2522 // This does not apply to STGPi as well, which does not have unpredictable
2523 // behavior in this case unlike normal stores, and always performs writeback
2524 // after reading the source register value.
2525 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2526 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2527 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2528 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2529 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2530 return E;
2531 }
2532 }
2533
2534 // Track which register units have been modified and used between the first
2535 // insn (inclusive) and the second insn.
2536 ModifiedRegUnits.clear();
2537 UsedRegUnits.clear();
2538 MBBI = next_nodbg(MBBI, E);
2539
2540 // We can't post-increment the stack pointer if any instruction between
2541 // the memory access (I) and the increment (MBBI) can access the memory
2542 // region defined by [SP, MBBI].
2543 const bool BaseRegSP = BaseReg == AArch64::SP;
2544 if (BaseRegSP && needsWinCFI(I->getMF())) {
2545 // FIXME: For now, we always block the optimization over SP in windows
2546 // targets as it requires to adjust the unwind/debug info, messing up
2547 // the unwind info can actually cause a miscompile.
2548 return E;
2549 }
2550
2551 unsigned Count = 0;
2552 MachineBasicBlock *CurMBB = I->getParent();
2553 // choice of next block to visit is liveins-based
2554 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2555
2556 while (true) {
2557 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2558 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2559 MachineInstr &MI = *MBBI;
2560
2561 // Don't count transient instructions towards the search limit since there
2562 // may be different numbers of them if e.g. debug information is present.
2563 if (!MI.isTransient())
2564 ++Count;
2565
2566 // If we found a match, return it.
2567 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2568 return MBBI;
2569
2570 // Update the status of what the instruction clobbered and used.
2571 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2572 TRI);
2573
2574 // Otherwise, if the base register is used or modified, we have no match,
2575 // so return early. If we are optimizing SP, do not allow instructions
2576 // that may load or store in between the load and the optimized value
2577 // update.
2578 if (!ModifiedRegUnits.available(BaseReg) ||
2579 !UsedRegUnits.available(BaseReg) ||
2580 (BaseRegSP && MBBI->mayLoadOrStore()))
2581 return E;
2582 }
2583
2584 if (!VisitSucc || Limit <= Count)
2585 break;
2586
2587 // Try to go downward to successors along a CF path w/o side enters
2588 // such that BaseReg is alive along it but not at its exits
2589 MachineBasicBlock *SuccToVisit = nullptr;
2590 unsigned LiveSuccCount = 0;
2591 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2592 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2593 if (Succ->isLiveIn(*AI)) {
2594 if (LiveSuccCount++)
2595 return E;
2596 if (Succ->pred_size() == 1)
2597 SuccToVisit = Succ;
2598 break;
2599 }
2600 }
2601 }
2602 if (!SuccToVisit)
2603 break;
2604 CurMBB = SuccToVisit;
2605 MBBI = CurMBB->begin();
2606 }
2607
2608 return E;
2609}
2610
2611MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2612 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2613 MachineBasicBlock::iterator B = I->getParent()->begin();
2614 MachineBasicBlock::iterator E = I->getParent()->end();
2615 MachineInstr &MemMI = *I;
2617 MachineFunction &MF = *MemMI.getMF();
2618
2621
2622 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2623 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2624 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2625 : AArch64::NoRegister};
2626
2627 // If the load/store is the first instruction in the block, there's obviously
2628 // not any matching update. Ditto if the memory offset isn't zero.
2629 if (MBBI == B || Offset != 0)
2630 return E;
2631 // If the base register overlaps a destination register, we can't
2632 // merge the update.
2633 if (!isTagStore(MemMI)) {
2634 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2635 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2636 return E;
2637 }
2638
2639 const bool BaseRegSP = BaseReg == AArch64::SP;
2640 if (BaseRegSP && needsWinCFI(I->getMF())) {
2641 // FIXME: For now, we always block the optimization over SP in windows
2642 // targets as it requires to adjust the unwind/debug info, messing up
2643 // the unwind info can actually cause a miscompile.
2644 return E;
2645 }
2646
2647 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2648 unsigned RedZoneSize =
2649 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2650
2651 // Track which register units have been modified and used between the first
2652 // insn (inclusive) and the second insn.
2653 ModifiedRegUnits.clear();
2654 UsedRegUnits.clear();
2655 unsigned Count = 0;
2656 bool MemAccessBeforeSPPreInc = false;
2657 MergeEither = true;
2658 do {
2659 MBBI = prev_nodbg(MBBI, B);
2660 MachineInstr &MI = *MBBI;
2661
2662 // Don't count transient instructions towards the search limit since there
2663 // may be different numbers of them if e.g. debug information is present.
2664 if (!MI.isTransient())
2665 ++Count;
2666
2667 // If we found a match, return it.
2668 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2669 // Check that the update value is within our red zone limit (which may be
2670 // zero).
2671 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2672 return E;
2673 return MBBI;
2674 }
2675
2676 // Update the status of what the instruction clobbered and used.
2677 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2678
2679 // Otherwise, if the base register is used or modified, we have no match, so
2680 // return early.
2681 if (!ModifiedRegUnits.available(BaseReg) ||
2682 !UsedRegUnits.available(BaseReg))
2683 return E;
2684
2685 // If we have a destination register (i.e. a load instruction) and a
2686 // destination register is used or modified, then we can only merge forward,
2687 // i.e. the combined instruction is put in the place of the memory
2688 // instruction. Same applies if we see a memory access or side effects.
2689 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2690 (DestReg[0] != AArch64::NoRegister &&
2691 !(ModifiedRegUnits.available(DestReg[0]) &&
2692 UsedRegUnits.available(DestReg[0]))) ||
2693 (DestReg[1] != AArch64::NoRegister &&
2694 !(ModifiedRegUnits.available(DestReg[1]) &&
2695 UsedRegUnits.available(DestReg[1]))))
2696 MergeEither = false;
2697
2698 // Keep track if we have a memory access before an SP pre-increment, in this
2699 // case we need to validate later that the update amount respects the red
2700 // zone.
2701 if (BaseRegSP && MBBI->mayLoadOrStore())
2702 MemAccessBeforeSPPreInc = true;
2703 } while (MBBI != B && Count < Limit);
2704 return E;
2705}
2706
2708AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2709 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2710 MachineBasicBlock::iterator B = I->getParent()->begin();
2711 MachineBasicBlock::iterator E = I->getParent()->end();
2712 MachineInstr &MemMI = *I;
2714
2715 // If the load is the first instruction in the block, there's obviously
2716 // not any matching load or store.
2717 if (MBBI == B)
2718 return E;
2719
2720 // Make sure the IndexReg is killed and the shift amount is zero.
2721 // TODO: Relex this restriction to extend, simplify processing now.
2722 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2723 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2725 return E;
2726
2728
2729 // Track which register units have been modified and used between the first
2730 // insn (inclusive) and the second insn.
2731 ModifiedRegUnits.clear();
2732 UsedRegUnits.clear();
2733 unsigned Count = 0;
2734 do {
2735 MBBI = prev_nodbg(MBBI, B);
2736 MachineInstr &MI = *MBBI;
2737
2738 // Don't count transient instructions towards the search limit since there
2739 // may be different numbers of them if e.g. debug information is present.
2740 if (!MI.isTransient())
2741 ++Count;
2742
2743 // If we found a match, return it.
2744 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2745 return MBBI;
2746 }
2747
2748 // Update the status of what the instruction clobbered and used.
2749 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2750
2751 // Otherwise, if the index register is used or modified, we have no match,
2752 // so return early.
2753 if (!ModifiedRegUnits.available(IndexReg) ||
2754 !UsedRegUnits.available(IndexReg))
2755 return E;
2756
2757 } while (MBBI != B && Count < Limit);
2758 return E;
2759}
2760
2761bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2763 MachineInstr &MI = *MBBI;
2764 // If this is a volatile load, don't mess with it.
2765 if (MI.hasOrderedMemoryRef())
2766 return false;
2767
2768 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2769 return false;
2770
2771 // Make sure this is a reg+imm.
2772 // FIXME: It is possible to extend it to handle reg+reg cases.
2774 return false;
2775
2776 // Look backward up to LdStLimit instructions.
2778 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2779 ++NumLoadsFromStoresPromoted;
2780 // Promote the load. Keeping the iterator straight is a
2781 // pain, so we let the merge routine tell us what the next instruction
2782 // is after it's done mucking about.
2783 MBBI = promoteLoadFromStore(MBBI, StoreI);
2784 return true;
2785 }
2786 return false;
2787}
2788
2789// Merge adjacent zero stores into a wider store.
2790bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2792 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2793 MachineInstr &MI = *MBBI;
2794 MachineBasicBlock::iterator E = MI.getParent()->end();
2795
2796 if (!TII->isCandidateToMergeOrPair(MI))
2797 return false;
2798
2799 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2800 LdStPairFlags Flags;
2802 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2803 if (MergeMI != E) {
2804 ++NumZeroStoresPromoted;
2805
2806 // Keeping the iterator straight is a pain, so we let the merge routine tell
2807 // us what the next instruction is after it's done mucking about.
2808 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2809 return true;
2810 }
2811 return false;
2812}
2813
2814// Find loads and stores that can be merged into a single load or store pair
2815// instruction.
2816bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2817 MachineInstr &MI = *MBBI;
2818 MachineBasicBlock::iterator E = MI.getParent()->end();
2819
2820 if (!TII->isCandidateToMergeOrPair(MI))
2821 return false;
2822
2823 // If disable-ldp feature is opted, do not emit ldp.
2824 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2825 return false;
2826
2827 // If disable-stp feature is opted, do not emit stp.
2828 if (MI.mayStore() && Subtarget->hasDisableStp())
2829 return false;
2830
2831 // Early exit if the offset is not possible to match. (6 bits of positive
2832 // range, plus allow an extra one in case we find a later insn that matches
2833 // with Offset-1)
2834 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2836 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2837 // Allow one more for offset.
2838 if (Offset > 0)
2839 Offset -= OffsetStride;
2840 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2841 return false;
2842
2843 // Look ahead up to LdStLimit instructions for a pairable instruction.
2844 LdStPairFlags Flags;
2846 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2847 if (Paired != E) {
2848 // Keeping the iterator straight is a pain, so we let the merge routine tell
2849 // us what the next instruction is after it's done mucking about.
2850 auto Prev = std::prev(MBBI);
2851
2852 // Fetch the memoperand of the load/store that is a candidate for
2853 // combination.
2854 MachineMemOperand *MemOp =
2855 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2856
2857 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2858 // that the alignment of the source pointer is at least double the alignment
2859 // of the type.
2860 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2861 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2862 // If there is no size/align information, cancel the transformation.
2863 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2864 NumFailedAlignmentCheck++;
2865 return false;
2866 }
2867
2868 // Get the needed alignments to check them if
2869 // ldp-aligned-only/stp-aligned-only features are opted.
2870 uint64_t MemAlignment = MemOp->getAlign().value();
2871 uint64_t TypeAlignment =
2872 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2873
2874 if (MemAlignment < 2 * TypeAlignment) {
2875 NumFailedAlignmentCheck++;
2876 return false;
2877 }
2878 }
2879
2880 ++NumPairCreated;
2881 if (TII->hasUnscaledLdStOffset(MI))
2882 ++NumUnscaledPairCreated;
2883
2884 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2885 // Collect liveness info for instructions between Prev and the new position
2886 // MBBI.
2887 for (auto I = std::next(Prev); I != MBBI; I++)
2888 updateDefinedRegisters(*I, DefinedInBB, TRI);
2889
2890 return true;
2891 }
2892 return false;
2893}
2894
2895bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2897 MachineInstr &MI = *MBBI;
2898 MachineBasicBlock::iterator E = MI.getParent()->end();
2900
2901 // Look forward to try to form a post-index instruction. For example,
2902 // ldr x0, [x20]
2903 // add x20, x20, #32
2904 // merged into:
2905 // ldr x0, [x20], #32
2906 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2907 if (Update != E) {
2908 // Merge the update into the ld/st.
2909 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2910 /*IsPreIdx=*/false,
2911 /*MergeEither=*/false)) {
2912 MBBI = *NextI;
2913 return true;
2914 }
2915 }
2916
2917 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2918 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2919 return false;
2920
2921 // Look back to try to find a pre-index instruction. For example,
2922 // add x0, x0, #8
2923 // ldr x1, [x0]
2924 // merged into:
2925 // ldr x1, [x0, #8]!
2926 bool MergeEither;
2927 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2928 if (Update != E) {
2929 // Merge the update into the ld/st.
2930 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2931 /*IsPreIdx=*/true, MergeEither)) {
2932 MBBI = *NextI;
2933 return true;
2934 }
2935 }
2936
2937 // The immediate in the load/store is scaled by the size of the memory
2938 // operation. The immediate in the add we're looking for,
2939 // however, is not, so adjust here.
2940 int UnscaledOffset =
2942
2943 // Look forward to try to find a pre-index instruction. For example,
2944 // ldr x1, [x0, #64]
2945 // add x0, x0, #64
2946 // merged into:
2947 // ldr x1, [x0, #64]!
2948 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2949 if (Update != E) {
2950 // Merge the update into the ld/st.
2951 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2952 /*IsPreIdx=*/true,
2953 /*MergeEither=*/false)) {
2954 MBBI = *NextI;
2955 return true;
2956 }
2957 }
2958
2959 return false;
2960}
2961
2962bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2963 int Scale) {
2964 MachineInstr &MI = *MBBI;
2965 MachineBasicBlock::iterator E = MI.getParent()->end();
2967
2968 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2969 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2970 return false;
2971
2972 // Look back to try to find a const offset for index LdSt instruction. For
2973 // example,
2974 // mov x8, #LargeImm ; = a * (1<<12) + imm12
2975 // ldr x1, [x0, x8]
2976 // merged into:
2977 // add x8, x0, a * (1<<12)
2978 // ldr x1, [x8, imm12]
2979 unsigned Offset;
2980 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
2981 if (Update != E && (Offset & (Scale - 1)) == 0) {
2982 // Merge the imm12 into the ld/st.
2983 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
2984 return true;
2985 }
2986
2987 return false;
2988}
2989
2990bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2991 bool EnableNarrowZeroStOpt) {
2992 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
2993
2994 bool Modified = false;
2995 // Four transformations to do here:
2996 // 1) Find loads that directly read from stores and promote them by
2997 // replacing with mov instructions. If the store is wider than the load,
2998 // the load will be replaced with a bitfield extract.
2999 // e.g.,
3000 // str w1, [x0, #4]
3001 // ldrh w2, [x0, #6]
3002 // ; becomes
3003 // str w1, [x0, #4]
3004 // lsr w2, w1, #16
3006 MBBI != E;) {
3007 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3008 Modified = true;
3009 else
3010 ++MBBI;
3011 }
3012 // 2) Merge adjacent zero stores into a wider store.
3013 // e.g.,
3014 // strh wzr, [x0]
3015 // strh wzr, [x0, #2]
3016 // ; becomes
3017 // str wzr, [x0]
3018 // e.g.,
3019 // str wzr, [x0]
3020 // str wzr, [x0, #4]
3021 // ; becomes
3022 // str xzr, [x0]
3023 if (EnableNarrowZeroStOpt)
3025 MBBI != E;) {
3026 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3027 Modified = true;
3028 else
3029 ++MBBI;
3030 }
3031 // 3) Find loads and stores that can be merged into a single load or store
3032 // pair instruction.
3033 // When compiling for SVE 128, also try to combine SVE fill/spill
3034 // instructions into LDP/STP.
3035 // e.g.,
3036 // ldr x0, [x2]
3037 // ldr x1, [x2, #8]
3038 // ; becomes
3039 // ldp x0, x1, [x2]
3040 // e.g.,
3041 // ldr z0, [x2]
3042 // ldr z1, [x2, #1, mul vl]
3043 // ; becomes
3044 // ldp q0, q1, [x2]
3045
3047 DefinedInBB.clear();
3048 DefinedInBB.addLiveIns(MBB);
3049 }
3050
3052 MBBI != E;) {
3053 // Track currently live registers up to this point, to help with
3054 // searching for a rename register on demand.
3055 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3056 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3057 Modified = true;
3058 else
3059 ++MBBI;
3060 }
3061 // 4) Find base register updates that can be merged into the load or store
3062 // as a base-reg writeback.
3063 // e.g.,
3064 // ldr x0, [x2]
3065 // add x2, x2, #4
3066 // ; becomes
3067 // ldr x0, [x2], #4
3069 MBBI != E;) {
3070 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3071 Modified = true;
3072 else
3073 ++MBBI;
3074 }
3075
3076 // 5) Find a register assigned with a const value that can be combined with
3077 // into the load or store. e.g.,
3078 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3079 // ldr x1, [x0, x8]
3080 // ; becomes
3081 // add x8, x0, a * (1<<12)
3082 // ldr x1, [x8, imm12]
3084 MBBI != E;) {
3085 int Scale;
3086 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3087 Modified = true;
3088 else
3089 ++MBBI;
3090 }
3091
3092 return Modified;
3093}
3094
3095bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3096 if (skipFunction(Fn.getFunction()))
3097 return false;
3098
3099 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3100 TII = Subtarget->getInstrInfo();
3101 TRI = Subtarget->getRegisterInfo();
3102 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3103
3104 // Resize the modified and used register unit trackers. We do this once
3105 // per function and then clear the register units each time we optimize a load
3106 // or store.
3107 ModifiedRegUnits.init(*TRI);
3108 UsedRegUnits.init(*TRI);
3109 DefinedInBB.init(*TRI);
3110
3111 bool Modified = false;
3112 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3113 for (auto &MBB : Fn) {
3114 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3115 Modified |= M;
3116 }
3117
3118 return Modified;
3119}
3120
3121// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3122// stores near one another? Note: The pre-RA instruction scheduler already has
3123// hooks to try and schedule pairable loads/stores together to improve pairing
3124// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3125
3126// FIXME: When pairing store instructions it's very possible for this pass to
3127// hoist a store with a KILL marker above another use (without a KILL marker).
3128// The resulting IR is invalid, but nothing uses the KILL markers after this
3129// pass, so it's never caused a problem in practice.
3130
3131/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3132/// load / store optimization pass.
3134 return new AArch64LoadStoreOpt();
3135}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:681
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
OpType getOperation() const
Definition MCDwarf.h:720
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.