LLVM 20.0.0git
HexagonLoadStoreWidening.cpp
Go to the documentation of this file.
1//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// HexagonStoreWidening:
9// Replace sequences of "narrow" stores to adjacent memory locations with
10// a fewer "wide" stores that have the same effect.
11// For example, replace:
12// S4_storeirb_io %100, 0, 0 ; store-immediate-byte
13// S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14// with
15// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
16// The above is the general idea. The actual cases handled by the code
17// may be a bit more complex.
18// The purpose of this pass is to reduce the number of outstanding stores,
19// or as one could say, "reduce store queue pressure". Also, wide stores
20// mean fewer stores, and since there are only two memory instructions allowed
21// per packet, it also means fewer packets, and ultimately fewer cycles.
22//
23// HexagonLoadWidening does the same thing as HexagonStoreWidening but
24// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
25// For example:
26// Replace
27// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
28// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
29// with
30// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
31// %2:intregs = COPY %4.isub_lo:doubleregs
32// %3:intregs = COPY %4.isub_hi:doubleregs
33//
34// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
35// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
36
37//===---------------------------------------------------------------------===//
38
39#include "HexagonInstrInfo.h"
40#include "HexagonRegisterInfo.h"
41#include "HexagonSubtarget.h"
53#include "llvm/IR/DebugLoc.h"
55#include "llvm/MC/MCInstrDesc.h"
56#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <vector>
66
67using namespace llvm;
68
69#define DEBUG_TYPE "hexagon-load-store-widening"
70
72 "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
73 cl::desc("Limit block size to analyze in load/store widening pass"));
74
75namespace llvm {
76
81
82} // end namespace llvm
83
84namespace {
85
86struct HexagonLoadStoreWidening {
87 enum WideningMode { Store, Load };
88 const HexagonInstrInfo *TII;
91 AliasAnalysis *AA;
93
94public:
95 HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
96 const HexagonRegisterInfo *TRI,
98 MachineFunction *MF, bool StoreMode)
99 : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
100 Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
101 HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
102
103 bool run();
104
105private:
106 const bool Mode;
107 const unsigned MaxWideSize = 8;
108 const HexagonInstrInfo *HII = nullptr;
109
110 using InstrSet = SmallPtrSet<MachineInstr *, 16>;
111 using InstrGroup = SmallVector<MachineInstr *, 8>;
112 using InstrGroupList = SmallVector<InstrGroup, 8>;
113
114 InstrSet ProcessedInsts;
115
116 unsigned getBaseAddressRegister(const MachineInstr *MI);
117 int64_t getOffset(const MachineInstr *MI);
118 int64_t getPostIncrementValue(const MachineInstr *MI);
119 bool handledInstType(const MachineInstr *MI);
120
121 void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
122 void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
123 bool processBasicBlock(MachineBasicBlock &MBB);
124 bool processGroup(InstrGroup &Group);
125 bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
126 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
127 bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
128 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
129 bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
130 bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
131 bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
132 bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
133};
134
135struct HexagonStoreWidening : public MachineFunctionPass {
136 static char ID;
137
138 HexagonStoreWidening() : MachineFunctionPass(ID) {
140 }
141
142 StringRef getPassName() const override { return "Hexagon Store Widening"; }
143
144 void getAnalysisUsage(AnalysisUsage &AU) const override {
148 }
149
150 bool runOnMachineFunction(MachineFunction &MFn) override {
151 if (skipFunction(MFn.getFunction()))
152 return false;
153
154 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
155 const HexagonInstrInfo *TII = ST.getInstrInfo();
156 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
158 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
159
160 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
161 }
162};
163
164struct HexagonLoadWidening : public MachineFunctionPass {
165 static char ID;
166
167 HexagonLoadWidening() : MachineFunctionPass(ID) {
169 }
170
171 StringRef getPassName() const override { return "Hexagon Load Widening"; }
172
173 void getAnalysisUsage(AnalysisUsage &AU) const override {
177 }
178
179 bool runOnMachineFunction(MachineFunction &MFn) override {
180 if (skipFunction(MFn.getFunction()))
181 return false;
182
183 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
184 const HexagonInstrInfo *TII = ST.getInstrInfo();
185 const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
187 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
188 return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
189 }
190};
191
192char HexagonStoreWidening::ID = 0;
193char HexagonLoadWidening::ID = 0;
194
195} // end anonymous namespace
196
197INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
198 "Hexagon Store Widening", false, false)
200INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
201 "Hexagon Store Widening", false, false)
202
203INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
204 "Hexagon Load Widening", false, false)
206INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
207 "Hexagon Load Widening", false, false)
208
210 assert(!MI->memoperands_empty() && "Expecting memory operands");
211 return **MI->memoperands_begin();
212}
213
214unsigned
215HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
216 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
217 unsigned Base, Offset;
218 HII->getBaseAndOffsetPosition(*MI, Base, Offset);
219 const MachineOperand &MO = MI->getOperand(Base);
220 assert(MO.isReg() && "Expecting register operand");
221 return MO.getReg();
222}
223
224int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
225 assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
226
227 // On Hexagon, post-incs always have an offset of 0
228 // There is no Offset operand to post-incs
229 if (HII->isPostIncrement(*MI))
230 return 0;
231
232 unsigned Base, Offset;
233
234 HII->getBaseAndOffsetPosition(*MI, Base, Offset);
235 const MachineOperand &MO = MI->getOperand(Offset);
236 switch (MO.getType()) {
238 return MO.getImm();
240 return MO.getOffset();
241 default:
242 break;
243 }
244 llvm_unreachable("Expecting an immediate or global operand");
245}
246
247inline int64_t
248HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
249 unsigned Base, PostIncIdx;
250 HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
251 const MachineOperand &MO = MI->getOperand(PostIncIdx);
252 return MO.getImm();
253}
254
255// Filtering function: any loads/stores whose opcodes are not "approved" of by
256// this function will not be subjected to widening.
257inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
258 unsigned Opc = MI->getOpcode();
259 if (Mode == WideningMode::Store) {
260 switch (Opc) {
261 case Hexagon::S4_storeirb_io:
262 case Hexagon::S4_storeirh_io:
263 case Hexagon::S4_storeiri_io:
264 case Hexagon::S2_storeri_io:
265 // Base address must be a register. (Implement FI later.)
266 return MI->getOperand(0).isReg();
267 case Hexagon::S2_storeri_pi:
268 return MI->getOperand(1).isReg();
269 }
270 } else {
271 // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
272 // loads. So we only widen 32 bit loads as we don't need to select the
273 // right bits with AND & SHIFT ops.
274 switch (Opc) {
275 case Hexagon::L2_loadri_io:
276 // Base address must be a register and offset must be immediate.
277 return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
278 MI->getOperand(2).isImm();
279 case Hexagon::L2_loadri_pi:
280 return !MI->memoperands_empty() && MI->getOperand(2).isReg();
281 }
282 }
283 return false;
284}
285
287 DenseSet<Register> &RegDefs,
288 DenseSet<Register> &RegUses) {
289 for (const auto &Op : MI->operands()) {
290 if (!Op.isReg())
291 continue;
292 if (Op.isDef())
293 RegDefs.insert(Op.getReg());
294 if (Op.readsReg())
295 RegUses.insert(Op.getReg());
296 }
297}
298
299bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
300 const MachineInstr *B) {
301 DenseSet<Register> ARegDefs;
302 DenseSet<Register> ARegUses;
303 addDefsUsesToList(A, ARegDefs, ARegUses);
304 if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
305 (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
306 return false;
307 for (const auto &BOp : B->operands()) {
308 if (!BOp.isReg())
309 continue;
310 if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
311 return false;
312 if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
313 return false;
314 }
315 return true;
316}
317
318// Inspect a machine basic block, and generate groups out of loads/stores
319// encountered in the block.
320//
321// A load/store group is a group of loads or stores that use the same base
322// register, and which can be reordered within that group without altering the
323// semantics of the program. A single group could be widened as
324// a whole, if there existed a single load/store instruction with the same
325// semantics as the entire group. In many cases, a single group may need more
326// than one wide load or store.
327void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB,
328 InstrGroupList &StoreGroups) {
329 // Traverse all instructions and if we encounter
330 // a load/store, then try to create a group starting at that instruction
331 // i.e. a sequence of independent loads/stores that can be widened.
332 for (auto I = MBB.begin(); I != MBB.end(); ++I) {
333 MachineInstr *MI = &(*I);
334 if (!handledInstType(MI))
335 continue;
336 if (ProcessedInsts.count(MI))
337 continue;
338
339 // Found a store. Try to create a store group.
340 InstrGroup G;
341 createGroup(MI, G);
342 if (G.size() > 1)
343 StoreGroups.push_back(G);
344 }
345}
346
347// Create a single load/store group. The insts need to be independent between
348// themselves, and also there cannot be other instructions between them
349// that could read or modify storage being read from or stored into.
350void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst,
351 InstrGroup &Group) {
352 assert(handledInstType(BaseInst) && "Unexpected instruction");
353 unsigned BaseReg = getBaseAddressRegister(BaseInst);
354 InstrGroup Other;
355
356 Group.push_back(BaseInst);
357 LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump());
358 auto End = BaseInst->getParent()->end();
359 auto I = BaseInst->getIterator();
360
361 while (true) {
362 I = std::next(I);
363 if (I == End)
364 break;
365 MachineInstr *MI = &(*I);
366
367 // Assume calls are aliased to everything.
368 if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
369 MI->hasOrderedMemoryRef())
370 return;
371
372 if (!handledInstType(MI)) {
373 if (MI->mayLoadOrStore())
374 Other.push_back(MI);
375 continue;
376 }
377
378 // We have a handledInstType instruction
379 // If this load/store instruction is aliased with anything already in the
380 // group, terminate the group now.
381 for (auto GI : Group)
382 if (GI->mayAlias(AA, *MI, true))
383 return;
384 if (Mode == WideningMode::Load) {
385 // Check if current load MI can be moved to the first load instruction
386 // in Group. If any load instruction aliases with memory instructions in
387 // Other, terminate the group.
388 for (auto MemI : Other)
389 if (!canSwapInstructions(MI, MemI))
390 return;
391 } else {
392 // Check if store instructions in the group can be moved to current
393 // store MI. If any store instruction aliases with memory instructions
394 // in Other, terminate the group.
395 for (auto MemI : Other) {
396 if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <=
397 0)
398 continue;
399 for (auto GI : Group)
400 if (!canSwapInstructions(MemI, GI))
401 return;
402 }
403 }
404
405 unsigned BR = getBaseAddressRegister(MI);
406 if (BR == BaseReg) {
407 LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump());
408 Group.push_back(MI);
409 ProcessedInsts.insert(MI);
410 }
411 } // while
412}
413
414// Check if load/store instructions S1 and S2 are adjacent. More precisely,
415// S2 has to access memory immediately following that accessed by S1.
416bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1,
417 const MachineInstr *S2) {
418 if (!handledInstType(S1) || !handledInstType(S2))
419 return false;
420
421 const MachineMemOperand &S1MO = getMemTarget(S1);
422
423 // Currently only handling immediate stores.
424 int Off1 = getOffset(S1);
425 int Off2 = getOffset(S2);
426
427 return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2)
428 : int(Off1 + S1MO.getSize().getValue()) == Off2;
429}
430
431/// Given a sequence of adjacent loads/stores, and a maximum size of a single
432/// wide inst, pick a group of insts that can be replaced by a single load/store
433/// of size not exceeding MaxSize. The selected sequence will be recorded
434/// in OG ("old group" of instructions).
435/// OG should be empty on entry, and should be left empty if the function
436/// fails.
437bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin,
438 InstrGroup::iterator End,
439 InstrGroup &OG, unsigned &TotalSize,
440 unsigned MaxSize) {
441 assert(Begin != End && "No instructions to analyze");
442 assert(OG.empty() && "Old group not empty on entry");
443
444 if (std::distance(Begin, End) <= 1)
445 return false;
446
447 MachineInstr *FirstMI = *Begin;
448 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
449 const MachineMemOperand &FirstMMO = getMemTarget(FirstMI);
450 if (!FirstMMO.getType().isValid())
451 return false;
452
453 unsigned Alignment = FirstMMO.getAlign().value();
454 unsigned SizeAccum = FirstMMO.getSize().getValue();
455 unsigned FirstOffset = getOffset(FirstMI);
456
457 // The initial value of SizeAccum should always be a power of 2.
458 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
459
460 // If the size of the first store equals to or exceeds the limit, do nothing.
461 if (SizeAccum >= MaxSize)
462 return false;
463
464 // If the size of the first load/store is greater than or equal to the address
465 // stored to, then the inst cannot be made any wider.
466 if (SizeAccum >= Alignment) {
468 dbgs() << "Size of load/store greater than equal to its alignment\n");
469 return false;
470 }
471
472 // The offset of a load/store will put restrictions on how wide the inst can
473 // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest
474 // bits be 0. If the first inst already exhausts the offset limits, quit.
475 // Test this by checking if the next wider size would exceed the limit.
476 // For post-increment instructions, the increment amount needs to follow the
477 // same rule.
478 unsigned OffsetOrIncVal = 0;
479 if (HII->isPostIncrement(*FirstMI))
480 OffsetOrIncVal = getPostIncrementValue(FirstMI);
481 else
482 OffsetOrIncVal = FirstOffset;
483 if ((2 * SizeAccum - 1) & OffsetOrIncVal) {
484 LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc"
485 << " value: " << getPostIncrementValue(FirstMI)
486 << " is invalid in the widened version\n");
487 return false;
488 }
489
490 OG.push_back(FirstMI);
491 MachineInstr *S1 = FirstMI;
492
493 // Pow2Num will be the largest number of elements in OG such that the sum
494 // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2.
495 unsigned Pow2Num = 1;
496 unsigned Pow2Size = SizeAccum;
497 bool HavePostInc = HII->isPostIncrement(*S1);
498
499 // Be greedy: keep accumulating insts as long as they are to adjacent
500 // memory locations, and as long as the total number of bytes stored
501 // does not exceed the limit (MaxSize).
502 // Keep track of when the total size covered is a power of 2, since
503 // this is a size a single load/store can cover.
504 for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
505 MachineInstr *S2 = *I;
506 // Insts are sorted, so if S1 and S2 are not adjacent, there won't be
507 // any other store to fill the "hole".
508 if (!areAdjacent(S1, S2))
509 break;
510
511 // Cannot widen two post increments, need to return two registers
512 // with incremented values
513 if (HavePostInc && HII->isPostIncrement(*S2))
514 break;
515
516 unsigned S2Size = getMemTarget(S2).getSize().getValue();
517 if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
518 break;
519
520 OG.push_back(S2);
521 SizeAccum += S2Size;
522 if (isPowerOf2_32(SizeAccum)) {
523 Pow2Num = OG.size();
524 Pow2Size = SizeAccum;
525 }
526 if ((2 * Pow2Size - 1) & FirstOffset)
527 break;
528
529 S1 = S2;
530 }
531
532 // The insts don't add up to anything that can be widened. Clean up.
533 if (Pow2Num <= 1) {
534 OG.clear();
535 return false;
536 }
537
538 // Only leave the loads/stores being widened.
539 OG.resize(Pow2Num);
540 TotalSize = Pow2Size;
541 return true;
542}
543
544/// Given an "old group" OG of insts, create a "new group" NG of instructions
545/// to replace them.
546bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG,
547 unsigned TotalSize) {
548 if (Mode == WideningMode::Store) {
549 return createWideStores(OG, NG, TotalSize);
550 }
551 return createWideLoads(OG, NG, TotalSize);
552}
553
554/// Given an "old group" OG of stores, create a "new group" NG of instructions
555/// to replace them. Ideally, NG would only have a single instruction in it,
556/// but that may only be possible for store-immediate.
557bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
558 unsigned TotalSize) {
559 // XXX Current limitations:
560 // - only handle a TotalSize of up to 8
561
562 LLVM_DEBUG(dbgs() << "Creating wide stores\n");
563 if (TotalSize > MaxWideSize)
564 return false;
565
566 uint64_t Acc = 0; // Value accumulator.
567 unsigned Shift = 0;
568 bool HaveImm = false;
569 bool HaveReg = false;
570
571 for (MachineInstr *MI : OG) {
572 const MachineMemOperand &MMO = getMemTarget(MI);
573 MachineOperand &SO = HII->isPostIncrement(*MI)
574 ? MI->getOperand(3)
575 : MI->getOperand(2); // Source.
576 unsigned NBits;
578 uint64_t Val;
579
580 switch (SO.getType()) {
582 LLVM_DEBUG(dbgs() << "Have store immediate\n");
583 HaveImm = true;
584
585 NBits = MMO.getSizeInBits().toRaw();
586 Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits));
587 Val = (SO.getImm() & Mask) << Shift;
588 Acc |= Val;
589 Shift += NBits;
590 break;
592 HaveReg = true;
593 break;
594 default:
595 LLVM_DEBUG(dbgs() << "Unhandled store\n");
596 return false;
597 }
598 }
599
600 if (HaveImm && HaveReg) {
601 LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n");
602 return false;
603 }
604
605 MachineInstr *FirstSt = OG.front();
606 DebugLoc DL = OG.back()->getDebugLoc();
607 const MachineMemOperand &OldM = getMemTarget(FirstSt);
608 MachineMemOperand *NewM =
609 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
610 TotalSize, OldM.getAlign(), OldM.getAAInfo());
611 MachineInstr *StI;
612 MachineOperand &MR =
613 (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1)
614 : FirstSt->getOperand(0));
615 auto SecondSt = OG.back();
616 if (HaveReg) {
617 MachineOperand FReg =
618 (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3)
619 : FirstSt->getOperand(2));
620 // Post increments appear first in the sorted group.
621 // Cannot have a post increment for the second instruction
622 assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc");
623 MachineOperand SReg = SecondSt->getOperand(2);
624 assert(FReg.isReg() && SReg.isReg() &&
625 "Cannot merge store register and store imm");
626 const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew);
627 Register VReg =
628 MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
629 MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg);
630 NG.push_back(CombI);
631
632 if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) {
633 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi);
634 auto IncDestMO = FirstSt->getOperand(0);
635 auto IncMO = FirstSt->getOperand(2);
636 StI =
637 BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg);
638 } else {
639 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
640 auto OffMO = FirstSt->getOperand(1);
641 StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg);
642 }
643 StI->addMemOperand(*MF, NewM);
644 NG.push_back(StI);
645 return true;
646 }
647
648 // Handle store immediates
649 // There are no post increment store immediates on Hexagon
650 assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc");
651 auto Off = FirstSt->getOperand(1).getImm();
652 if (TotalSize == 8) {
653 // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg
654 uint64_t Mask = 0xFFFFFFFFU;
655 int LowerAcc = int(Mask & Acc);
656 int UpperAcc = Acc >> 32;
657 Register DReg =
658 MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass);
659 MachineInstr *CombI;
660 if (Acc != 0) {
661 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
662 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
663 Register VReg = MF->getRegInfo().createVirtualRegister(RC);
664 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
665 NG.push_back(TfrI);
666 const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir);
667 CombI = BuildMI(*MF, DL, CombD, DReg)
668 .addImm(UpperAcc)
669 .addReg(VReg, RegState::Kill);
670 }
671 // If immediates are 0, we do not need A2_tfrsi
672 else {
673 const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii);
674 CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0);
675 }
676 NG.push_back(CombI);
677 const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io);
678 StI =
679 BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill);
680 } else if (Acc < 0x10000) {
681 // Create mem[hw] = #Acc
682 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io
683 : (TotalSize == 4) ? Hexagon::S4_storeiri_io
684 : 0;
685 assert(WOpc && "Unexpected size");
686
687 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
688 const MCInstrDesc &StD = TII->get(WOpc);
689 StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val);
690 } else {
691 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
692 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
693 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
694 Register VReg = MF->getRegInfo().createVirtualRegister(RC);
695 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
696 NG.push_back(TfrI);
697
698 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io
699 : (TotalSize == 4) ? Hexagon::S2_storeri_io
700 : 0;
701 assert(WOpc && "Unexpected size");
702
703 const MCInstrDesc &StD = TII->get(WOpc);
704 StI =
705 BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill);
706 }
707 StI->addMemOperand(*MF, NewM);
708 NG.push_back(StI);
709
710 return true;
711}
712
713/// Given an "old group" OG of loads, create a "new group" NG of instructions
714/// to replace them. Ideally, NG would only have a single instruction in it,
715/// but that may only be possible for double register loads.
716bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG,
717 unsigned TotalSize) {
718 LLVM_DEBUG(dbgs() << "Creating wide loads\n");
719 // XXX Current limitations:
720 // - only expect stores of immediate values in OG,
721 // - only handle a TotalSize of up to 8
722 if (TotalSize > MaxWideSize)
723 return false;
724 assert(OG.size() == 2 && "Expecting two elements in Instruction Group.");
725
726 MachineInstr *FirstLd = OG.front();
727 const MachineMemOperand &OldM = getMemTarget(FirstLd);
728 MachineMemOperand *NewM =
729 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
730 TotalSize, OldM.getAlign(), OldM.getAAInfo());
731
732 MachineOperand &MR = FirstLd->getOperand(0);
733 MachineOperand &MRBase =
734 (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2)
735 : FirstLd->getOperand(1));
736 DebugLoc DL = OG.back()->getDebugLoc();
737
738 // Create the double register Load Instruction.
739 Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass);
740 MachineInstr *LdI;
741
742 // Post increments appear first in the sorted group
743 if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) {
744 auto IncDestMO = FirstLd->getOperand(1);
745 auto IncMO = FirstLd->getOperand(3);
746 LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi))
747 .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
748 .add(IncDestMO)
749 .add(MRBase)
750 .add(IncMO);
751 LdI->addMemOperand(*MF, NewM);
752 } else {
753 auto OffMO = FirstLd->getOperand(2);
754 LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io))
755 .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg())
756 .add(MRBase)
757 .add(OffMO);
758 LdI->addMemOperand(*MF, NewM);
759 }
760 NG.push_back(LdI);
761
762 auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg,
763 MachineInstr *DstReg) {
764 Register DestReg = DstReg->getOperand(0).getReg();
765 return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg)
766 .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg);
767 };
768
769 MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd);
770 MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back());
771 NG.push_back(LdI_lo);
772 NG.push_back(LdI_hi);
773
774 return true;
775}
776
777// Replace instructions from the old group OG with instructions from the
778// new group NG. Conceptually, remove all instructions in OG, and then
779// insert all instructions in NG, starting at where the first instruction
780// from OG was (in the order in which they appeared in the basic block).
781// (The ordering in OG does not have to match the order in the basic block.)
782bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) {
783 LLVM_DEBUG({
784 dbgs() << "Replacing:\n";
785 for (auto I : OG)
786 dbgs() << " " << *I;
787 dbgs() << "with\n";
788 for (auto I : NG)
789 dbgs() << " " << *I;
790 });
791
793 MachineBasicBlock::iterator InsertAt = MBB->end();
794
795 // Need to establish the insertion point.
796 // For loads the best one is right before the first load in the OG,
797 // but in the order in which the insts occur in the program list.
798 // For stores the best point is right after the last store in the OG.
799 // Since the ordering in OG does not correspond
800 // to the order in the program list, we need to do some work to find
801 // the insertion point.
802
803 // Create a set of all instructions in OG (for quick lookup).
804 InstrSet OldMemInsts;
805 for (auto *I : OG)
806 OldMemInsts.insert(I);
807
808 if (Mode == WideningMode::Load) {
809 // Find the first load instruction in the block that is present in OG.
810 for (auto &I : *MBB) {
811 if (OldMemInsts.count(&I)) {
812 InsertAt = I;
813 break;
814 }
815 }
816
817 assert((InsertAt != MBB->end()) && "Cannot locate any load from the group");
818
819 for (auto *I : NG)
820 MBB->insert(InsertAt, I);
821 } else {
822 // Find the last store instruction in the block that is present in OG.
823 auto I = MBB->rbegin();
824 for (; I != MBB->rend(); ++I) {
825 if (OldMemInsts.count(&(*I))) {
826 InsertAt = (*I).getIterator();
827 break;
828 }
829 }
830
831 assert((I != MBB->rend()) && "Cannot locate any store from the group");
832
833 for (auto I = NG.rbegin(); I != NG.rend(); ++I)
834 MBB->insertAfter(InsertAt, *I);
835 }
836
837 for (auto *I : OG)
838 I->eraseFromParent();
839
840 return true;
841}
842
843// Break up the group into smaller groups, each of which can be replaced by
844// a single wide load/store. Widen each such smaller group and replace the old
845// instructions with the widened ones.
846bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) {
847 bool Changed = false;
848 InstrGroup::iterator I = Group.begin(), E = Group.end();
849 InstrGroup OG, NG; // Old and new groups.
850 unsigned CollectedSize;
851
852 while (I != E) {
853 OG.clear();
854 NG.clear();
855
856 bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) &&
857 createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG);
858 if (!Succ)
859 continue;
860
861 assert(OG.size() > 1 && "Created invalid group");
862 assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements");
863 I += OG.size() - 1;
864
865 Changed = true;
866 }
867
868 return Changed;
869}
870
871// Process a single basic block: create the load/store groups, and replace them
872// with the widened insts, if possible. Processing of each basic block
873// is independent from processing of any other basic block. This transfor-
874// mation could be stopped after having processed any basic block without
875// any ill effects (other than not having performed widening in the unpro-
876// cessed blocks). Also, the basic blocks can be processed in any order.
877bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
878 InstrGroupList SGs;
879 bool Changed = false;
880
881 // To prevent long compile time check for max BB size.
883 return false;
884
885 createGroups(MBB, SGs);
886
887 auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool {
888 return getOffset(A) < getOffset(B);
889 };
890 for (auto &G : SGs) {
891 assert(G.size() > 1 && "Group with fewer than 2 elements");
892 llvm::sort(G, Less);
893
894 Changed |= processGroup(G);
895 }
896
897 return Changed;
898}
899
900bool HexagonLoadStoreWidening::run() {
901 bool Changed = false;
902
903 for (auto &B : *MF)
904 Changed |= processBasicBlock(B);
905
906 return Changed;
907}
908
910 return new HexagonStoreWidening();
911}
912
914 return new HexagonLoadWidening();
915}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
aarch64 promote const
static const LLT S1
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1313
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
hexagon widen Hexagon Store Widening
static cl::opt< unsigned > MaxMBBSizeForLoadStoreWidening("max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000), cl::desc("Limit block size to analyze in load/store widening pass"))
hexagon widen Hexagon Store false hexagon widen loads
hexagon widen stores
static void addDefsUsesToList(const MachineInstr *MI, DenseSet< Register > &RegDefs, DenseSet< Register > &RegUses)
hexagon widen Hexagon Store false hexagon widen Hexagon Load static false const MachineMemOperand & getMemTarget(const MachineInstr *MI)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
constexpr bool isValid() const
Definition: LowLevelType.h:145
TypeSize getValue() const
uint64_t toRaw() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:818
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
bool isKill() const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_Register
Register operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:193
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1118
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createHexagonLoadWidening()
void initializeHexagonStoreWideningPass(PassRegistry &)
unsigned getKillRegState(bool B)
void initializeHexagonLoadWideningPass(PassRegistry &)
FunctionPass * createHexagonStoreWidening()
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85