LLVM 23.0.0git
AArch64A57FPLoadBalancing.cpp
Go to the documentation of this file.
1//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// For best-case performance on Cortex-A57, we should try to use a balanced
9// mix of odd and even D-registers when performing a critical sequence of
10// independent, non-quadword FP/ASIMD floating-point multiply or
11// multiply-accumulate operations.
12//
13// This pass attempts to detect situations where the register allocation may
14// adversely affect this load balancing and to change the registers used so as
15// to better utilize the CPU.
16//
17// Ideally we'd just take each multiply or multiply-accumulate in turn and
18// allocate it alternating even or odd registers. However, multiply-accumulates
19// are most efficiently performed in the same functional unit as their
20// accumulation operand. Therefore this pass tries to find maximal sequences
21// ("Chains") of multiply-accumulates linked via their accumulation operand,
22// and assign them all the same "color" (oddness/evenness).
23//
24// This optimization affects S-register and D-register floating point
25// multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
26// FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
27// not affected.
28//===----------------------------------------------------------------------===//
29
30#include "AArch64.h"
31#include "AArch64InstrInfo.h"
32#include "AArch64Subtarget.h"
42#include "llvm/Support/Debug.h"
44using namespace llvm;
45
46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
47
48// Enforce the algorithm to use the scavenged register even when the original
49// destination register is the correct color. Used for testing.
50static cl::opt<bool>
51TransformAll("aarch64-a57-fp-load-balancing-force-all",
52 cl::desc("Always modify dest registers regardless of color"),
53 cl::init(false), cl::Hidden);
54
55// Never use the balance information obtained from chains - return a specific
56// color always. Used for testing.
58OverrideBalance("aarch64-a57-fp-load-balancing-override",
59 cl::desc("Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
62
63//===----------------------------------------------------------------------===//
64// Helper functions
65
66// Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
67static bool isMul(MachineInstr *MI) {
68 switch (MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
73 return true;
74 default:
75 return false;
76 }
77}
78
79// Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
80static bool isMla(MachineInstr *MI) {
81 switch (MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
90 return true;
91 default:
92 return false;
93 }
94}
95
96//===----------------------------------------------------------------------===//
97
98namespace {
99/// A "color", which is either even or odd. Yes, these aren't really colors
100/// but the algorithm is conceptually doing two-color graph coloring.
101enum class Color { Even, Odd };
102#ifndef NDEBUG
103static const char *ColorNames[2] = { "Even", "Odd" };
104#endif
105
106class Chain;
107
108class AArch64A57FPLoadBalancingImpl {
109public:
110 bool run(MachineFunction &MF);
111
112private:
113 MachineRegisterInfo *MRI;
114 const TargetRegisterInfo *TRI;
115 RegisterClassInfo RCI;
116
117 bool runOnBasicBlock(MachineBasicBlock &MBB);
118 bool colorChainSet(std::vector<Chain *> GV, MachineBasicBlock &MBB,
119 int &Balance);
120 bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
121 int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
122 void scanInstruction(MachineInstr *MI, unsigned Idx,
123 std::map<unsigned, Chain *> &Active,
124 std::vector<std::unique_ptr<Chain>> &AllChains);
125 void maybeKillChain(MachineOperand &MO, unsigned Idx,
126 std::map<unsigned, Chain *> &RegChains);
127 Color getColor(unsigned Register);
128 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain *> &L);
129};
130
131class AArch64A57FPLoadBalancingLegacy : public MachineFunctionPass {
132public:
133 static char ID;
134 explicit AArch64A57FPLoadBalancingLegacy() : MachineFunctionPass(ID) {}
135
136 bool runOnMachineFunction(MachineFunction &MF) override;
137
138 MachineFunctionProperties getRequiredProperties() const override {
139 return MachineFunctionProperties().setNoVRegs();
140 }
141
142 StringRef getPassName() const override {
143 return "A57 FP Anti-dependency breaker";
144 }
145
146 void getAnalysisUsage(AnalysisUsage &AU) const override {
147 AU.setPreservesCFG();
149 }
150};
151}
152
153char AArch64A57FPLoadBalancingLegacy::ID = 0;
154
155INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancingLegacy, DEBUG_TYPE,
156 "AArch64 A57 FP Load-Balancing", false, false)
157INITIALIZE_PASS_END(AArch64A57FPLoadBalancingLegacy, DEBUG_TYPE,
158 "AArch64 A57 FP Load-Balancing", false, false)
159
160namespace {
161/// A Chain is a sequence of instructions that are linked together by
162/// an accumulation operand. For example:
163///
164/// fmul def d0, ?
165/// fmla def d1, ?, ?, killed d0
166/// fmla def d2, ?, ?, killed d1
167///
168/// There may be other instructions interleaved in the sequence that
169/// do not belong to the chain. These other instructions must not use
170/// the "chain" register at any point.
171///
172/// We currently only support chains where the "chain" operand is killed
173/// at each link in the chain for simplicity.
174/// A chain has three important instructions - Start, Last and Kill.
175/// * The start instruction is the first instruction in the chain.
176/// * Last is the final instruction in the chain.
177/// * Kill may or may not be defined. If defined, Kill is the instruction
178/// where the outgoing value of the Last instruction is killed.
179/// This information is important as if we know the outgoing value is
180/// killed with no intervening uses, we can safely change its register.
181///
182/// Without a kill instruction, we must assume the outgoing value escapes
183/// beyond our model and either must not change its register or must
184/// create a fixup FMOV to keep the old register value consistent.
185///
186class Chain {
187public:
188 /// The important (marker) instructions.
190 /// The index, from the start of the basic block, that each marker
191 /// appears. These are stored so we can do quick interval tests.
193 /// All instructions in the chain.
194 std::set<MachineInstr*> Insts;
195 /// True if KillInst cannot be modified. If this is true,
196 /// we cannot change LastInst's outgoing register.
197 /// This will be true for tied values and regmasks.
199 /// The "color" of LastInst. This will be the preferred chain color,
200 /// as changing intermediate nodes is easy but changing the last
201 /// instruction can be more tricky.
203
204 Chain(MachineInstr *MI, unsigned Idx, Color C)
205 : StartInst(MI), LastInst(MI), KillInst(nullptr),
206 StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
207 LastColor(C) {
208 Insts.insert(MI);
209 }
210
211 /// Add a new instruction into the chain. The instruction's dest operand
212 /// has the given color.
213 void add(MachineInstr *MI, unsigned Idx, Color C) {
214 LastInst = MI;
215 LastInstIdx = Idx;
216 LastColor = C;
218 "Chain: broken invariant. A Chain can only be killed after its last "
219 "def");
220
221 Insts.insert(MI);
222 }
223
224 /// Return true if MI is a member of the chain.
225 bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
226
227 /// Return the number of instructions in the chain.
228 unsigned size() const {
229 return Insts.size();
230 }
231
232 /// Inform the chain that its last active register (the dest register of
233 /// LastInst) is killed by MI with no intervening uses or defs.
234 void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
235 KillInst = MI;
236 KillInstIdx = Idx;
237 KillIsImmutable = Immutable;
239 "Chain: broken invariant. A Chain can only be killed after its last "
240 "def");
241 }
242
243 /// Return the first instruction in the chain.
244 MachineInstr *getStart() const { return StartInst; }
245 /// Return the last instruction in the chain.
246 MachineInstr *getLast() const { return LastInst; }
247 /// Return the "kill" instruction (as set with setKill()) or NULL.
248 MachineInstr *getKill() const { return KillInst; }
249 /// Return an instruction that can be used as an iterator for the end
250 /// of the chain. This is the maximum of KillInst (if set) and LastInst.
255
256 /// Can the Kill instruction (assuming one exists) be modified?
257 bool isKillImmutable() const { return KillIsImmutable; }
258
259 /// Return the preferred color of this chain.
261 if (OverrideBalance != 0)
262 return OverrideBalance == 1 ? Color::Even : Color::Odd;
263 return LastColor;
264 }
265
266 /// Return true if this chain (StartInst..KillInst) overlaps with Other.
267 bool rangeOverlapsWith(const Chain &Other) const {
268 unsigned End = KillInst ? KillInstIdx : LastInstIdx;
269 unsigned OtherEnd = Other.KillInst ?
270 Other.KillInstIdx : Other.LastInstIdx;
271
272 return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
273 }
274
275 /// Return true if this chain starts before Other.
276 bool startsBefore(const Chain *Other) const {
277 return StartInstIdx < Other->StartInstIdx;
278 }
279
280 /// Return true if the group will require a fixup MOV at the end.
281 bool requiresFixup() const {
282 return (getKill() && isKillImmutable()) || !getKill();
283 }
284
285 /// Return a simple string representation of the chain.
286 std::string str() const {
287 std::string S;
288 raw_string_ostream OS(S);
289
290 OS << "{";
291 StartInst->print(OS, /* SkipOpers= */true);
292 OS << " -> ";
293 LastInst->print(OS, /* SkipOpers= */true);
294 if (KillInst) {
295 OS << " (kill @ ";
296 KillInst->print(OS, /* SkipOpers= */true);
297 OS << ")";
298 }
299 OS << "}";
300
301 return OS.str();
302 }
303
304};
305
306} // end anonymous namespace
307
308//===----------------------------------------------------------------------===//
309
310bool AArch64A57FPLoadBalancingImpl::run(MachineFunction &MF) {
311 if (!MF.getSubtarget<AArch64Subtarget>().balanceFPOps())
312 return false;
313
314 bool Changed = false;
315 LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
316
317 MRI = &MF.getRegInfo();
319 RCI.runOnMachineFunction(MF);
320
321 for (auto &MBB : MF) {
323 }
324
325 return Changed;
326}
327
328bool AArch64A57FPLoadBalancingLegacy::runOnMachineFunction(
329 MachineFunction &MF) {
330 if (skipFunction(MF.getFunction()))
331 return false;
332 return AArch64A57FPLoadBalancingImpl().run(MF);
333}
334
335PreservedAnalyses
338 if (AArch64A57FPLoadBalancingImpl().run(MF)) {
341 return PA;
342 }
343 return PreservedAnalyses::all();
344}
345
346bool AArch64A57FPLoadBalancingImpl::runOnBasicBlock(MachineBasicBlock &MBB) {
347 bool Changed = false;
348 LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB
349 << " - scanning instructions...\n");
350
351 // First, scan the basic block producing a set of chains.
352
353 // The currently "active" chains - chains that can be added to and haven't
354 // been killed yet. This is keyed by register - all chains can only have one
355 // "link" register between each inst in the chain.
356 std::map<unsigned, Chain*> ActiveChains;
357 std::vector<std::unique_ptr<Chain>> AllChains;
358 unsigned Idx = 0;
359 for (auto &MI : MBB)
360 scanInstruction(&MI, Idx++, ActiveChains, AllChains);
361
362 LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size()
363 << " chains created.\n");
364
365 // Group the chains into disjoint sets based on their liveness range. This is
366 // a poor-man's version of graph coloring. Ideally we'd create an interference
367 // graph and perform full-on graph coloring on that, but;
368 // (a) That's rather heavyweight for only two colors.
369 // (b) We expect multiple disjoint interference regions - in practice the live
370 // range of chains is quite small and they are clustered between loads
371 // and stores.
373 for (auto &I : AllChains)
374 EC.insert(I.get());
375
376 for (auto &I : AllChains)
377 for (auto &J : AllChains)
378 if (I != J && I->rangeOverlapsWith(*J))
379 EC.unionSets(I.get(), J.get());
380 LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
381
382 // Now we assume that every member of an equivalence class interferes
383 // with every other member of that class, and with no members of other classes.
384
385 // Convert the EquivalenceClasses to a simpler set of sets.
386 std::vector<std::vector<Chain*> > V;
387 for (const auto &E : EC) {
388 if (!E->isLeader())
389 continue;
390 std::vector<Chain *> Cs(EC.member_begin(*E), EC.member_end());
391 if (Cs.empty()) continue;
392 V.push_back(std::move(Cs));
393 }
394
395 // Now we have a set of sets, order them by start address so
396 // we can iterate over them sequentially.
397 llvm::sort(V,
398 [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
399 return A.front()->startsBefore(B.front());
400 });
401
402 // As we only have two colors, we can track the global (BB-level) balance of
403 // odds versus evens. We aim to keep this near zero to keep both execution
404 // units fed.
405 // Positive means we're even-heavy, negative we're odd-heavy.
406 //
407 // FIXME: If chains have interdependencies, for example:
408 // mul r0, r1, r2
409 // mul r3, r0, r1
410 // We do not model this and may color each one differently, assuming we'll
411 // get ILP when we obviously can't. This hasn't been seen to be a problem
412 // in practice so far, so we simplify the algorithm by ignoring it.
413 int Parity = 0;
414
415 for (auto &I : V)
416 Changed |= colorChainSet(std::move(I), MBB, Parity);
417
418 return Changed;
419}
420
421Chain *AArch64A57FPLoadBalancingImpl::getAndEraseNext(Color PreferredColor,
422 std::vector<Chain *> &L) {
423 if (L.empty())
424 return nullptr;
425
426 // We try and get the best candidate from L to color next, given that our
427 // preferred color is "PreferredColor". L is ordered from larger to smaller
428 // chains. It is beneficial to color the large chains before the small chains,
429 // but if we can't find a chain of the maximum length with the preferred color,
430 // we fuzz the size and look for slightly smaller chains before giving up and
431 // returning a chain that must be recolored.
432
433 // FIXME: Does this need to be configurable?
434 const unsigned SizeFuzz = 1;
435 unsigned MinSize = L.front()->size() - SizeFuzz;
436 for (auto I = L.begin(), E = L.end(); I != E; ++I) {
437 if ((*I)->size() <= MinSize) {
438 // We've gone past the size limit. Return the previous item.
439 Chain *Ch = *--I;
440 L.erase(I);
441 return Ch;
442 }
443
444 if ((*I)->getPreferredColor() == PreferredColor) {
445 Chain *Ch = *I;
446 L.erase(I);
447 return Ch;
448 }
449 }
450
451 // Bailout case - just return the first item.
452 Chain *Ch = L.front();
453 L.erase(L.begin());
454 return Ch;
455}
456
457bool AArch64A57FPLoadBalancingImpl::colorChainSet(std::vector<Chain *> GV,
458 MachineBasicBlock &MBB,
459 int &Parity) {
460 bool Changed = false;
461 LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
462
463 // Sort by descending size order so that we allocate the most important
464 // sets first.
465 // Tie-break equivalent sizes by sorting chains requiring fixups before
466 // those without fixups. The logic here is that we should look at the
467 // chains that we cannot change before we look at those we can,
468 // so the parity counter is updated and we know what color we should
469 // change them to!
470 // Final tie-break with instruction order so pass output is stable (i.e. not
471 // dependent on malloc'd pointer values).
472 llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
473 if (G1->size() != G2->size())
474 return G1->size() > G2->size();
475 if (G1->requiresFixup() != G2->requiresFixup())
476 return G1->requiresFixup() > G2->requiresFixup();
477 // Make sure startsBefore() produces a stable final order.
478 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
479 "Starts before not total order!");
480 return G1->startsBefore(G2);
481 });
482
483 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
484 while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
485 // Start off by assuming we'll color to our own preferred color.
486 Color C = PreferredColor;
487 if (Parity == 0)
488 // But if we really don't care, use the chain's preferred color.
489 C = G->getPreferredColor();
490
491 LLVM_DEBUG(dbgs() << " - Parity=" << Parity
492 << ", Color=" << ColorNames[(int)C] << "\n");
493
494 // If we'll need a fixup FMOV, don't bother. Testing has shown that this
495 // happens infrequently and when it does it has at least a 50% chance of
496 // slowing code down instead of speeding it up.
497 if (G->requiresFixup() && C != G->getPreferredColor()) {
498 C = G->getPreferredColor();
499 LLVM_DEBUG(dbgs() << " - " << G->str()
500 << " - not worthwhile changing; "
501 "color remains "
502 << ColorNames[(int)C] << "\n");
503 }
504
505 Changed |= colorChain(G, C, MBB);
506
507 Parity += (C == Color::Even) ? G->size() : -G->size();
508 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
509 }
510
511 return Changed;
512}
513
514int AArch64A57FPLoadBalancingImpl::scavengeRegister(Chain *G, Color C,
515 MachineBasicBlock &MBB) {
516 // Can we find an appropriate register that is available throughout the life
517 // of the chain? Simulate liveness backwards until the end of the chain.
518 LiveRegUnits Units(*TRI);
519 Units.addLiveOuts(MBB);
521 MachineBasicBlock::iterator ChainEnd = G->end();
522 while (I != ChainEnd) {
523 --I;
524 Units.stepBackward(*I);
525 }
526
527 // Check which register units are alive throughout the chain.
528 MachineBasicBlock::iterator ChainBegin = G->begin();
529 assert(ChainBegin != ChainEnd && "Chain should contain instructions");
530 do {
531 --I;
532 Units.accumulate(*I);
533 } while (I != ChainBegin);
534
535 // Make sure we allocate in-order, to get the cheapest registers first.
536 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
537 auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
538 for (auto Reg : Ord) {
539 if (!Units.available(Reg))
540 continue;
541 if (C == getColor(Reg))
542 return Reg;
543 }
544
545 return -1;
546}
547
548bool AArch64A57FPLoadBalancingImpl::colorChain(Chain *G, Color C,
549 MachineBasicBlock &MBB) {
550 bool Changed = false;
551 LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
552 << ColorNames[(int)C] << ")\n");
553
554 // Try and obtain a free register of the right class. Without a register
555 // to play with we cannot continue.
556 int Reg = scavengeRegister(G, C, MBB);
557 if (Reg == -1) {
558 LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
559 return false;
560 }
561 LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
562
563 std::map<unsigned, unsigned> Substs;
564 for (MachineInstr &I : *G) {
565 if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
566 continue;
567
568 // I is a member of G, or I is a mutable instruction that kills G.
569
570 std::vector<unsigned> ToErase;
571 for (auto &U : I.operands()) {
572 if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
573 Register OrigReg = U.getReg();
574 U.setReg(Substs[OrigReg]);
575 if (U.isKill())
576 // Don't erase straight away, because there may be other operands
577 // that also reference this substitution!
578 ToErase.push_back(OrigReg);
579 } else if (U.isRegMask()) {
580 for (auto J : Substs) {
581 if (U.clobbersPhysReg(J.first))
582 ToErase.push_back(J.first);
583 }
584 }
585 }
586 // Now it's safe to remove the substs identified earlier.
587 for (auto J : ToErase)
588 Substs.erase(J);
589
590 // Only change the def if this isn't the last instruction.
591 if (&I != G->getKill()) {
592 MachineOperand &MO = I.getOperand(0);
593
594 bool Change = TransformAll || getColor(MO.getReg()) != C;
595 if (G->requiresFixup() && &I == G->getLast())
596 Change = false;
597
598 if (Change) {
599 Substs[MO.getReg()] = Reg;
600 MO.setReg(Reg);
601
602 Changed = true;
603 }
604 }
605 }
606 assert(Substs.size() == 0 && "No substitutions should be left active!");
607
608 if (G->getKill()) {
609 LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n");
610 } else {
611 // We didn't have a kill instruction, but we didn't seem to need to change
612 // the destination register anyway.
613 LLVM_DEBUG(dbgs() << " - Destination register not changed.\n");
614 }
615 return Changed;
616}
617
618void AArch64A57FPLoadBalancingImpl::scanInstruction(
619 MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
620 std::vector<std::unique_ptr<Chain>> &AllChains) {
621 // Inspect "MI", updating ActiveChains and AllChains.
622
623 if (isMul(MI)) {
624
625 for (auto &I : MI->uses())
626 maybeKillChain(I, Idx, ActiveChains);
627 for (auto &I : MI->defs())
628 maybeKillChain(I, Idx, ActiveChains);
629
630 // Create a new chain. Multiplies don't require forwarding so can go on any
631 // unit.
632 Register DestReg = MI->getOperand(0).getReg();
633
634 LLVM_DEBUG(dbgs() << "New chain started for register "
635 << printReg(DestReg, TRI) << " at " << *MI);
636
637 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
638 ActiveChains[DestReg] = G.get();
639 AllChains.push_back(std::move(G));
640
641 } else if (isMla(MI)) {
642
643 // It is beneficial to keep MLAs on the same functional unit as their
644 // accumulator operand.
645 Register DestReg = MI->getOperand(0).getReg();
646 Register AccumReg = MI->getOperand(3).getReg();
647
648 maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
649 maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
650 if (DestReg != AccumReg)
651 maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
652
653 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
654 LLVM_DEBUG(dbgs() << "Chain found for accumulator register "
655 << printReg(AccumReg, TRI) << " in MI " << *MI);
656
657 // For simplicity we only chain together sequences of MULs/MLAs where the
658 // accumulator register is killed on each instruction. This means we don't
659 // need to track other uses of the registers we want to rewrite.
660 //
661 // FIXME: We could extend to handle the non-kill cases for more coverage.
662 if (MI->getOperand(3).isKill()) {
663 // Add to chain.
664 LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
665 ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
666 // Handle cases where the destination is not the same as the accumulator.
667 if (DestReg != AccumReg) {
668 ActiveChains[DestReg] = ActiveChains[AccumReg];
669 ActiveChains.erase(AccumReg);
670 }
671 return;
672 }
673
675 dbgs() << "Cannot add to chain because accumulator operand wasn't "
676 << "marked <kill>!\n");
677 maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
678 }
679
680 LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
681 << printReg(DestReg, TRI) << "\n");
682 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
683 ActiveChains[DestReg] = G.get();
684 AllChains.push_back(std::move(G));
685
686 } else {
687
688 // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
689 // lists.
690 for (auto &I : MI->uses())
691 maybeKillChain(I, Idx, ActiveChains);
692 for (auto &I : MI->defs())
693 maybeKillChain(I, Idx, ActiveChains);
694
695 }
696}
697
698void AArch64A57FPLoadBalancingImpl::maybeKillChain(
699 MachineOperand &MO, unsigned Idx,
700 std::map<unsigned, Chain *> &ActiveChains) {
701 // Given an operand and the set of active chains (keyed by register),
702 // determine if a chain should be ended and remove from ActiveChains.
703 MachineInstr *MI = MO.getParent();
704
705 if (MO.isReg()) {
706
707 // If this is a KILL of a current chain, record it.
708 if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
709 LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
710 << "\n");
711 ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
712 }
713 ActiveChains.erase(MO.getReg());
714
715 } else if (MO.isRegMask()) {
716
717 for (auto I = ActiveChains.begin(), E = ActiveChains.end();
718 I != E;) {
719 if (MO.clobbersPhysReg(I->first)) {
720 LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain "
721 << printReg(I->first, TRI) << "\n");
722 I->second->setKill(MI, Idx, /*Immutable=*/true);
723 ActiveChains.erase(I++);
724 } else
725 ++I;
726 }
727
728 }
729}
730
731Color AArch64A57FPLoadBalancingImpl::getColor(unsigned Reg) {
732 if ((TRI->getEncodingValue(Reg) % 2) == 0)
733 return Color::Even;
734 else
735 return Color::Odd;
736}
737
738// Factory function used by AArch64TargetMachine to add the pass to the passmanager.
740 return new AArch64A57FPLoadBalancingLegacy();
741}
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file declares the machine register scavenger class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterInfo * getTargetRegisterInfo() const
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64A57FPLoadBalancingLegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.