LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64A57FPLoadBalancing.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 207 214 96.7 %
Date: 2017-09-14 15:23:50 Functions: 20 21 95.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : // For best-case performance on Cortex-A57, we should try to use a balanced
      10             : // mix of odd and even D-registers when performing a critical sequence of
      11             : // independent, non-quadword FP/ASIMD floating-point multiply or
      12             : // multiply-accumulate operations.
      13             : //
      14             : // This pass attempts to detect situations where the register allocation may
      15             : // adversely affect this load balancing and to change the registers used so as
      16             : // to better utilize the CPU.
      17             : //
      18             : // Ideally we'd just take each multiply or multiply-accumulate in turn and
      19             : // allocate it alternating even or odd registers. However, multiply-accumulates
      20             : // are most efficiently performed in the same functional unit as their
      21             : // accumulation operand. Therefore this pass tries to find maximal sequences
      22             : // ("Chains") of multiply-accumulates linked via their accumulation operand,
      23             : // and assign them all the same "color" (oddness/evenness).
      24             : //
      25             : // This optimization affects S-register and D-register floating point
      26             : // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
      27             : // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
      28             : // not affected.
      29             : //===----------------------------------------------------------------------===//
      30             : 
      31             : #include "AArch64.h"
      32             : #include "AArch64InstrInfo.h"
      33             : #include "AArch64Subtarget.h"
      34             : #include "llvm/ADT/BitVector.h"
      35             : #include "llvm/ADT/EquivalenceClasses.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineFunctionPass.h"
      38             : #include "llvm/CodeGen/MachineInstr.h"
      39             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      40             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      41             : #include "llvm/CodeGen/RegisterClassInfo.h"
      42             : #include "llvm/CodeGen/RegisterScavenging.h"
      43             : #include "llvm/Support/CommandLine.h"
      44             : #include "llvm/Support/Debug.h"
      45             : #include "llvm/Support/raw_ostream.h"
      46             : using namespace llvm;
      47             : 
      48             : #define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
      49             : 
      50             : // Enforce the algorithm to use the scavenged register even when the original
      51             : // destination register is the correct color. Used for testing.
      52             : static cl::opt<bool>
      53       72306 : TransformAll("aarch64-a57-fp-load-balancing-force-all",
      54      216918 :              cl::desc("Always modify dest registers regardless of color"),
      55      289224 :              cl::init(false), cl::Hidden);
      56             : 
      57             : // Never use the balance information obtained from chains - return a specific
      58             : // color always. Used for testing.
      59             : static cl::opt<unsigned>
      60       72306 : OverrideBalance("aarch64-a57-fp-load-balancing-override",
      61      216918 :               cl::desc("Ignore balance information, always return "
      62             :                        "(1: Even, 2: Odd)."),
      63      289224 :               cl::init(0), cl::Hidden);
      64             : 
      65             : //===----------------------------------------------------------------------===//
      66             : // Helper functions
      67             : 
      68             : // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
      69             : static bool isMul(MachineInstr *MI) {
      70        1713 :   switch (MI->getOpcode()) {
      71             :   case AArch64::FMULSrr:
      72             :   case AArch64::FNMULSrr:
      73             :   case AArch64::FMULDrr:
      74             :   case AArch64::FNMULDrr:
      75             :     return true;
      76             :   default:
      77             :     return false;
      78             :   }
      79             : }
      80             : 
      81             : // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
      82        1636 : static bool isMla(MachineInstr *MI) {
      83        3272 :   switch (MI->getOpcode()) {
      84             :   case AArch64::FMSUBSrrr:
      85             :   case AArch64::FMADDSrrr:
      86             :   case AArch64::FNMSUBSrrr:
      87             :   case AArch64::FNMADDSrrr:
      88             :   case AArch64::FMSUBDrrr:
      89             :   case AArch64::FMADDDrrr:
      90             :   case AArch64::FNMSUBDrrr:
      91             :   case AArch64::FNMADDDrrr:
      92             :     return true;
      93        1432 :   default:
      94        1432 :     return false;
      95             :   }
      96             : }
      97             : 
      98             : //===----------------------------------------------------------------------===//
      99             : 
     100             : namespace {
     101             : /// A "color", which is either even or odd. Yes, these aren't really colors
     102             : /// but the algorithm is conceptually doing two-color graph coloring.
     103             : enum class Color { Even, Odd };
     104             : #ifndef NDEBUG
     105             : static const char *ColorNames[2] = { "Even", "Odd" };
     106             : #endif
     107             : 
     108             : class Chain;
     109             : 
     110         901 : class AArch64A57FPLoadBalancing : public MachineFunctionPass {
     111             :   MachineRegisterInfo *MRI;
     112             :   const TargetRegisterInfo *TRI;
     113             :   RegisterClassInfo RCI;
     114             : 
     115             : public:
     116             :   static char ID;
     117         908 :   explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
     118         908 :     initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
     119         908 :   }
     120             : 
     121             :   bool runOnMachineFunction(MachineFunction &F) override;
     122             : 
     123         899 :   MachineFunctionProperties getRequiredProperties() const override {
     124        2697 :     return MachineFunctionProperties().set(
     125        2697 :         MachineFunctionProperties::Property::NoVRegs);
     126             :   }
     127             : 
     128         905 :   StringRef getPassName() const override {
     129         905 :     return "A57 FP Anti-dependency breaker";
     130             :   }
     131             : 
     132         899 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     133         899 :     AU.setPreservesCFG();
     134         899 :     MachineFunctionPass::getAnalysisUsage(AU);
     135         899 :   }
     136             : 
     137             : private:
     138             :   bool runOnBasicBlock(MachineBasicBlock &MBB);
     139             :   bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
     140             :                      int &Balance);
     141             :   bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
     142             :   int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
     143             :   void scanInstruction(MachineInstr *MI, unsigned Idx,
     144             :                        std::map<unsigned, Chain*> &Active,
     145             :                        std::vector<std::unique_ptr<Chain>> &AllChains);
     146             :   void maybeKillChain(MachineOperand &MO, unsigned Idx,
     147             :                       std::map<unsigned, Chain*> &RegChains);
     148             :   Color getColor(unsigned Register);
     149             :   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
     150             : };
     151             : }
     152             : 
     153             : char AArch64A57FPLoadBalancing::ID = 0;
     154             : 
     155       53045 : INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
     156             :                       "AArch64 A57 FP Load-Balancing", false, false)
     157      315277 : INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
     158             :                     "AArch64 A57 FP Load-Balancing", false, false)
     159             : 
     160             : namespace {
     161             : /// A Chain is a sequence of instructions that are linked together by
     162             : /// an accumulation operand. For example:
     163             : ///
     164             : ///   fmul d0<def>, ?
     165             : ///   fmla d1<def>, ?, ?, d0<kill>
     166             : ///   fmla d2<def>, ?, ?, d1<kill>
     167             : ///
     168             : /// There may be other instructions interleaved in the sequence that
     169             : /// do not belong to the chain. These other instructions must not use
     170             : /// the "chain" register at any point.
     171             : ///
     172             : /// We currently only support chains where the "chain" operand is killed
     173             : /// at each link in the chain for simplicity.
     174             : /// A chain has three important instructions - Start, Last and Kill.
     175             : ///   * The start instruction is the first instruction in the chain.
     176             : ///   * Last is the final instruction in the chain.
     177             : ///   * Kill may or may not be defined. If defined, Kill is the instruction
     178             : ///     where the outgoing value of the Last instruction is killed.
     179             : ///     This information is important as if we know the outgoing value is
     180             : ///     killed with no intervening uses, we can safely change its register.
     181             : ///
     182             : /// Without a kill instruction, we must assume the outgoing value escapes
     183             : /// beyond our model and either must not change its register or must
     184             : /// create a fixup FMOV to keep the old register value consistent.
     185             : ///
     186         238 : class Chain {
     187             : public:
     188             :   /// The important (marker) instructions.
     189             :   MachineInstr *StartInst, *LastInst, *KillInst;
     190             :   /// The index, from the start of the basic block, that each marker
     191             :   /// appears. These are stored so we can do quick interval tests.
     192             :   unsigned StartInstIdx, LastInstIdx, KillInstIdx;
     193             :   /// All instructions in the chain.
     194             :   std::set<MachineInstr*> Insts;
     195             :   /// True if KillInst cannot be modified. If this is true,
     196             :   /// we cannot change LastInst's outgoing register.
     197             :   /// This will be true for tied values and regmasks.
     198             :   bool KillIsImmutable;
     199             :   /// The "color" of LastInst. This will be the preferred chain color,
     200             :   /// as changing intermediate nodes is easy but changing the last
     201             :   /// instruction can be more tricky.
     202             :   Color LastColor;
     203             : 
     204             :   Chain(MachineInstr *MI, unsigned Idx, Color C)
     205         119 :       : StartInst(MI), LastInst(MI), KillInst(nullptr),
     206             :         StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
     207         238 :         LastColor(C) {
     208         238 :     Insts.insert(MI);
     209             :   }
     210             : 
     211             :   /// Add a new instruction into the chain. The instruction's dest operand
     212             :   /// has the given color.
     213             :   void add(MachineInstr *MI, unsigned Idx, Color C) {
     214         162 :     LastInst = MI;
     215         162 :     LastInstIdx = Idx;
     216         162 :     LastColor = C;
     217             :     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
     218             :            "Chain: broken invariant. A Chain can only be killed after its last "
     219             :            "def");
     220             : 
     221         324 :     Insts.insert(MI);
     222             :   }
     223             : 
     224             :   /// Return true if MI is a member of the chain.
     225         567 :   bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
     226             : 
     227             :   /// Return the number of instructions in the chain.
     228             :   unsigned size() const {
     229         942 :     return Insts.size();
     230             :   }
     231             : 
     232             :   /// Inform the chain that its last active register (the dest register of
     233             :   /// LastInst) is killed by MI with no intervening uses or defs.
     234             :   void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
     235          53 :     KillInst = MI;
     236          53 :     KillInstIdx = Idx;
     237          53 :     KillIsImmutable = Immutable;
     238             :     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
     239             :            "Chain: broken invariant. A Chain can only be killed after its last "
     240             :            "def");
     241             :   }
     242             : 
     243             :   /// Return the first instruction in the chain.
     244             :   MachineInstr *getStart() const { return StartInst; }
     245             :   /// Return the last instruction in the chain.
     246             :   MachineInstr *getLast() const { return LastInst; }
     247             :   /// Return the "kill" instruction (as set with setKill()) or NULL.
     248             :   MachineInstr *getKill() const { return KillInst; }
     249             :   /// Return an instruction that can be used as an iterator for the end
     250             :   /// of the chain. This is the maximum of KillInst (if set) and LastInst.
     251             :   MachineBasicBlock::iterator end() const {
     252         714 :     return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst);
     253             :   }
     254         476 :   MachineBasicBlock::iterator begin() const { return getStart(); }
     255             : 
     256             :   /// Can the Kill instruction (assuming one exists) be modified?
     257             :   bool isKillImmutable() const { return KillIsImmutable; }
     258             : 
     259             :   /// Return the preferred color of this chain.
     260             :   Color getPreferredColor() {
     261         292 :     if (OverrideBalance != 0)
     262         120 :       return OverrideBalance == 1 ? Color::Even : Color::Odd;
     263         172 :     return LastColor;
     264             :   }
     265             : 
     266             :   /// Return true if this chain (StartInst..KillInst) overlaps with Other.
     267             :   bool rangeOverlapsWith(const Chain &Other) const {
     268          44 :     unsigned End = KillInst ? KillInstIdx : LastInstIdx;
     269          44 :     unsigned OtherEnd = Other.KillInst ?
     270             :       Other.KillInstIdx : Other.LastInstIdx;
     271             : 
     272          44 :     return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
     273             :   }
     274             : 
     275             :   /// Return true if this chain starts before Other.
     276             :   bool startsBefore(const Chain *Other) const {
     277           0 :     return StartInstIdx < Other->StartInstIdx;
     278             :   }
     279             : 
     280             :   /// Return true if the group will require a fixup MOV at the end.
     281             :   bool requiresFixup() const {
     282         416 :     return (getKill() && isKillImmutable()) || !getKill();
     283             :   }
     284             : 
     285             :   /// Return a simple string representation of the chain.
     286             :   std::string str() const {
     287             :     std::string S;
     288             :     raw_string_ostream OS(S);
     289             : 
     290             :     OS << "{";
     291             :     StartInst->print(OS, /* SkipOpers= */true);
     292             :     OS << " -> ";
     293             :     LastInst->print(OS, /* SkipOpers= */true);
     294             :     if (KillInst) {
     295             :       OS << " (kill @ ";
     296             :       KillInst->print(OS, /* SkipOpers= */true);
     297             :       OS << ")";
     298             :     }
     299             :     OS << "}";
     300             : 
     301             :     return OS.str();
     302             :   }
     303             : 
     304             : };
     305             : 
     306             : } // end anonymous namespace
     307             : 
     308             : //===----------------------------------------------------------------------===//
     309             : 
     310       10994 : bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
     311       10994 :   if (skipFunction(*F.getFunction()))
     312             :     return false;
     313             : 
     314       10993 :   if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
     315             :     return false;
     316             : 
     317         193 :   bool Changed = false;
     318             :   DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
     319             : 
     320         193 :   MRI = &F.getRegInfo();
     321         386 :   TRI = F.getRegInfo().getTargetRegisterInfo();
     322         193 :   RCI.runOnMachineFunction(F);
     323             : 
     324         828 :   for (auto &MBB : F) {
     325         249 :     Changed |= runOnBasicBlock(MBB);
     326             :   }
     327             : 
     328             :   return Changed;
     329             : }
     330             : 
     331         249 : bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
     332         249 :   bool Changed = false;
     333             :   DEBUG(dbgs() << "Running on MBB: " << MBB << " - scanning instructions...\n");
     334             : 
     335             :   // First, scan the basic block producing a set of chains.
     336             : 
     337             :   // The currently "active" chains - chains that can be added to and haven't
     338             :   // been killed yet. This is keyed by register - all chains can only have one
     339             :   // "link" register between each inst in the chain.
     340         498 :   std::map<unsigned, Chain*> ActiveChains;
     341         498 :   std::vector<std::unique_ptr<Chain>> AllChains;
     342         249 :   unsigned Idx = 0;
     343        4422 :   for (auto &MI : MBB)
     344        1713 :     scanInstruction(&MI, Idx++, ActiveChains, AllChains);
     345             : 
     346             :   DEBUG(dbgs() << "Scan complete, "<< AllChains.size() << " chains created.\n");
     347             : 
     348             :   // Group the chains into disjoint sets based on their liveness range. This is
     349             :   // a poor-man's version of graph coloring. Ideally we'd create an interference
     350             :   // graph and perform full-on graph coloring on that, but;
     351             :   //   (a) That's rather heavyweight for only two colors.
     352             :   //   (b) We expect multiple disjoint interference regions - in practice the live
     353             :   //       range of chains is quite small and they are clustered between loads
     354             :   //       and stores.
     355         498 :   EquivalenceClasses<Chain*> EC;
     356        1115 :   for (auto &I : AllChains)
     357         238 :     EC.insert(I.get());
     358             : 
     359        1115 :   for (auto &I : AllChains)
     360         639 :     for (auto &J : AllChains)
     361         283 :       if (I != J && I->rangeOverlapsWith(*J))
     362          64 :         EC.unionSets(I.get(), J.get());
     363             :   DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
     364             : 
     365             :   // Now we assume that every member of an equivalence class interferes
     366             :   // with every other member of that class, and with no members of other classes.
     367             : 
     368             :   // Convert the EquivalenceClasses to a simpler set of sets.
     369         498 :   std::vector<std::vector<Chain*> > V;
     370         498 :   for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
     371         817 :     std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
     372         135 :     if (Cs.empty()) continue;
     373         206 :     V.push_back(std::move(Cs));
     374             :   }
     375             : 
     376             :   // Now we have a set of sets, order them by start address so
     377             :   // we can iterate over them sequentially.
     378         498 :   std::sort(V.begin(), V.end(),
     379             :             [](const std::vector<Chain*> &A,
     380             :                const std::vector<Chain*> &B) {
     381          14 :       return A.front()->startsBefore(B.front());
     382           7 :     });
     383             : 
     384             :   // As we only have two colors, we can track the global (BB-level) balance of
     385             :   // odds versus evens. We aim to keep this near zero to keep both execution
     386             :   // units fed.
     387             :   // Positive means we're even-heavy, negative we're odd-heavy.
     388             :   //
     389             :   // FIXME: If chains have interdependencies, for example:
     390             :   //   mul r0, r1, r2
     391             :   //   mul r3, r0, r1
     392             :   // We do not model this and may color each one differently, assuming we'll
     393             :   // get ILP when we obviously can't. This hasn't been seen to be a problem
     394             :   // in practice so far, so we simplify the algorithm by ignoring it.
     395         249 :   int Parity = 0;
     396             : 
     397        1099 :   for (auto &I : V)
     398         309 :     Changed |= colorChainSet(std::move(I), MBB, Parity);
     399             : 
     400         249 :   return Changed;
     401             : }
     402             : 
     403         222 : Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
     404             :                                                   std::vector<Chain*> &L) {
     405         222 :   if (L.empty())
     406             :     return nullptr;
     407             : 
     408             :   // We try and get the best candidate from L to color next, given that our
     409             :   // preferred color is "PreferredColor". L is ordered from larger to smaller
     410             :   // chains. It is beneficial to color the large chains before the small chains,
     411             :   // but if we can't find a chain of the maximum length with the preferred color,
     412             :   // we fuzz the size and look for slightly smaller chains before giving up and
     413             :   // returning a chain that must be recolored.
     414             : 
     415             :   // FIXME: Does this need to be configurable?
     416         119 :   const unsigned SizeFuzz = 1;
     417         238 :   unsigned MinSize = L.front()->size() - SizeFuzz;
     418         451 :   for (auto I = L.begin(), E = L.end(); I != E; ++I) {
     419         258 :     if ((*I)->size() <= MinSize) {
     420             :       // We've gone past the size limit. Return the previous item.
     421           6 :       Chain *Ch = *--I;
     422          12 :       L.erase(I);
     423             :       return Ch;
     424             :     }
     425             : 
     426         246 :     if ((*I)->getPreferredColor() == PreferredColor) {
     427          29 :       Chain *Ch = *I;
     428          58 :       L.erase(I);
     429             :       return Ch;
     430             :     }
     431             :   }
     432             : 
     433             :   // Bailout case - just return the first item.
     434          84 :   Chain *Ch = L.front();
     435         252 :   L.erase(L.begin());
     436             :   return Ch;
     437             : }
     438             : 
     439         103 : bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
     440             :                                               MachineBasicBlock &MBB,
     441             :                                               int &Parity) {
     442         103 :   bool Changed = false;
     443             :   DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
     444             : 
     445             :   // Sort by descending size order so that we allocate the most important
     446             :   // sets first.
     447             :   // Tie-break equivalent sizes by sorting chains requiring fixups before
     448             :   // those without fixups. The logic here is that we should look at the
     449             :   // chains that we cannot change before we look at those we can,
     450             :   // so the parity counter is updated and we know what color we should
     451             :   // change them to!
     452             :   // Final tie-break with instruction order so pass output is stable (i.e. not
     453             :   // dependent on malloc'd pointer values).
     454         234 :   std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
     455          56 :       if (G1->size() != G2->size())
     456          48 :         return G1->size() > G2->size();
     457           8 :       if (G1->requiresFixup() != G2->requiresFixup())
     458           8 :         return G1->requiresFixup() > G2->requiresFixup();
     459             :       // Make sure startsBefore() produces a stable final order.
     460             :       assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
     461             :              "Starts before not total order!");
     462           0 :       return G1->startsBefore(G2);
     463             :     });
     464             : 
     465         103 :   Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
     466         222 :   while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
     467             :     // Start off by assuming we'll color to our own preferred color.
     468         119 :     Color C = PreferredColor;
     469         119 :     if (Parity == 0)
     470             :       // But if we really don't care, use the chain's preferred color.
     471          97 :       C = G->getPreferredColor();
     472             : 
     473             :     DEBUG(dbgs() << " - Parity=" << Parity << ", Color="
     474             :           << ColorNames[(int)C] << "\n");
     475             : 
     476             :     // If we'll need a fixup FMOV, don't bother. Testing has shown that this
     477             :     // happens infrequently and when it does it has at least a 50% chance of
     478             :     // slowing code down instead of speeding it up.
     479         144 :     if (G->requiresFixup() && C != G->getPreferredColor()) {
     480           0 :       C = G->getPreferredColor();
     481             :       DEBUG(dbgs() << " - " << G->str() << " - not worthwhile changing; "
     482             :             "color remains " << ColorNames[(int)C] << "\n");
     483             :     }
     484             : 
     485         119 :     Changed |= colorChain(G, C, MBB);
     486             : 
     487         238 :     Parity += (C == Color::Even) ? G->size() : -G->size();
     488         119 :     PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
     489             :   }
     490             : 
     491         103 :   return Changed;
     492             : }
     493             : 
     494         119 : int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
     495             :                                                 MachineBasicBlock &MBB) {
     496             :   // Can we find an appropriate register that is available throughout the life
     497             :   // of the chain? Simulate liveness backwards until the end of the chain.
     498         357 :   LiveRegUnits Units(*TRI);
     499         119 :   Units.addLiveOuts(MBB);
     500         119 :   MachineBasicBlock::iterator I = MBB.end();
     501         238 :   MachineBasicBlock::iterator ChainEnd = G->end();
     502         607 :   while (I != ChainEnd) {
     503         244 :     --I;
     504         244 :     Units.stepBackward(*I);
     505             :   }
     506             : 
     507             :   // Check which register units are alive throughout the chain.
     508         119 :   MachineBasicBlock::iterator ChainBegin = G->begin();
     509             :   assert(ChainBegin != ChainEnd && "Chain should contain instructions");
     510             :   do {
     511         424 :     --I;
     512         424 :     Units.accumulate(*I);
     513         424 :   } while (I != ChainBegin);
     514             : 
     515             :   // Make sure we allocate in-order, to get the cheapest registers first.
     516         119 :   unsigned RegClassID = ChainBegin->getDesc().OpInfo[0].RegClass;
     517         238 :   auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
     518         820 :   for (auto Reg : Ord) {
     519         701 :     if (!Units.available(Reg))
     520         497 :       continue;
     521         408 :     if (C == getColor(Reg))
     522         119 :       return Reg;
     523             :   }
     524             : 
     525             :   return -1;
     526             : }
     527             : 
     528         119 : bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
     529             :                                            MachineBasicBlock &MBB) {
     530         119 :   bool Changed = false;
     531             :   DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
     532             :         << ColorNames[(int)C] << ")\n");
     533             : 
     534             :   // Try and obtain a free register of the right class. Without a register
     535             :   // to play with we cannot continue.
     536         119 :   int Reg = scavengeRegister(G, C, MBB);
     537         119 :   if (Reg == -1) {
     538             :     DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
     539             :     return false;
     540             :   }
     541             :   DEBUG(dbgs() << " - Scavenged register: " << TRI->getName(Reg) << "\n");
     542             : 
     543         119 :   std::map<unsigned, unsigned> Substs;
     544        1324 :   for (MachineInstr &I : *G) {
     545         143 :     if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
     546          96 :       continue;
     547             : 
     548             :     // I is a member of G, or I is a mutable instruction that kills G.
     549             : 
     550         656 :     std::vector<unsigned> ToErase;
     551        1515 :     for (auto &U : I.operands()) {
     552        4056 :       if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
     553         210 :         unsigned OrigReg = U.getReg();
     554         210 :         U.setReg(Substs[OrigReg]);
     555         210 :         if (U.isKill())
     556             :           // Don't erase straight away, because there may be other operands
     557             :           // that also reference this substitution!
     558         204 :           ToErase.push_back(OrigReg);
     559         977 :       } else if (U.isRegMask()) {
     560           0 :         for (auto J : Substs) {
     561           0 :           if (U.clobbersPhysReg(J.first))
     562           0 :             ToErase.push_back(J.first);
     563             :         }
     564             :       }
     565             :     }
     566             :     // Now it's safe to remove the substs identified earlier.
     567        1516 :     for (auto J : ToErase)
     568         204 :       Substs.erase(J);
     569             : 
     570             :     // Only change the def if this isn't the last instruction.
     571         328 :     if (&I != G->getKill()) {
     572         281 :       MachineOperand &MO = I.getOperand(0);
     573             : 
     574         340 :       bool Change = TransformAll || getColor(MO.getReg()) != C;
     575         126 :       if (G->requiresFixup() && &I == G->getLast())
     576             :         Change = false;
     577             : 
     578         209 :       if (Change) {
     579         204 :         Substs[MO.getReg()] = Reg;
     580         204 :         MO.setReg(Reg);
     581             : 
     582         204 :         Changed = true;
     583             :       }
     584             :     }
     585             :   }
     586             :   assert(Substs.size() == 0 && "No substitutions should be left active!");
     587             : 
     588         119 :   if (G->getKill()) {
     589             :     DEBUG(dbgs() << " - Kill instruction seen.\n");
     590             :   } else {
     591             :     // We didn't have a kill instruction, but we didn't seem to need to change
     592             :     // the destination register anyway.
     593             :     DEBUG(dbgs() << " - Destination register not changed.\n");
     594             :   }
     595         119 :   return Changed;
     596             : }
     597             : 
     598        1713 : void AArch64A57FPLoadBalancing::scanInstruction(
     599             :     MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
     600             :     std::vector<std::unique_ptr<Chain>> &AllChains) {
     601             :   // Inspect "MI", updating ActiveChains and AllChains.
     602             : 
     603        3426 :   if (isMul(MI)) {
     604             : 
     605         539 :     for (auto &I : MI->uses())
     606         154 :       maybeKillChain(I, Idx, ActiveChains);
     607         231 :     for (auto &I : MI->defs())
     608          77 :       maybeKillChain(I, Idx, ActiveChains);
     609             : 
     610             :     // Create a new chain. Multiplies don't require forwarding so can go on any
     611             :     // unit.
     612          77 :     unsigned DestReg = MI->getOperand(0).getReg();
     613             : 
     614             :     DEBUG(dbgs() << "New chain started for register "
     615             :           << TRI->getName(DestReg) << " at " << *MI);
     616             : 
     617         231 :     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     618         154 :     ActiveChains[DestReg] = G.get();
     619         154 :     AllChains.push_back(std::move(G));
     620             : 
     621        1636 :   } else if (isMla(MI)) {
     622             : 
     623             :     // It is beneficial to keep MLAs on the same functional unit as their
     624             :     // accumulator operand.
     625         204 :     unsigned DestReg  = MI->getOperand(0).getReg();
     626         204 :     unsigned AccumReg = MI->getOperand(3).getReg();
     627             : 
     628         408 :     maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
     629         408 :     maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
     630         204 :     if (DestReg != AccumReg)
     631          72 :       maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
     632             : 
     633         408 :     if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
     634             :       DEBUG(dbgs() << "Chain found for accumulator register "
     635             :             << TRI->getName(AccumReg) << " in MI " << *MI);
     636             : 
     637             :       // For simplicity we only chain together sequences of MULs/MLAs where the
     638             :       // accumulator register is killed on each instruction. This means we don't
     639             :       // need to track other uses of the registers we want to rewrite.
     640             :       //
     641             :       // FIXME: We could extend to handle the non-kill cases for more coverage.
     642         324 :       if (MI->getOperand(3).isKill()) {
     643             :         // Add to chain.
     644             :         DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
     645         486 :         ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
     646             :         // Handle cases where the destination is not the same as the accumulator.
     647         162 :         if (DestReg != AccumReg) {
     648          36 :           ActiveChains[DestReg] = ActiveChains[AccumReg];
     649             :           ActiveChains.erase(AccumReg);
     650             :         }
     651         162 :         return;
     652             :       }
     653             : 
     654             :       DEBUG(dbgs() << "Cannot add to chain because accumulator operand wasn't "
     655             :             << "marked <kill>!\n");
     656           0 :       maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
     657             :     }
     658             : 
     659             :     DEBUG(dbgs() << "Creating new chain for dest register "
     660             :           << TRI->getName(DestReg) << "\n");
     661         126 :     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     662          84 :     ActiveChains[DestReg] = G.get();
     663          84 :     AllChains.push_back(std::move(G));
     664             : 
     665             :   } else {
     666             : 
     667             :     // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
     668             :     // lists.
     669        6054 :     for (auto &I : MI->uses())
     670        3190 :       maybeKillChain(I, Idx, ActiveChains);
     671        3720 :     for (auto &I : MI->defs())
     672         856 :       maybeKillChain(I, Idx, ActiveChains);
     673             : 
     674             :   }
     675             : }
     676             : 
     677        4757 : void AArch64A57FPLoadBalancing::
     678             : maybeKillChain(MachineOperand &MO, unsigned Idx,
     679             :                std::map<unsigned, Chain*> &ActiveChains) {
     680             :   // Given an operand and the set of active chains (keyed by register),
     681             :   // determine if a chain should be ended and remove from ActiveChains.
     682        4757 :   MachineInstr *MI = MO.getParent();
     683             : 
     684        4757 :   if (MO.isReg()) {
     685             : 
     686             :     // If this is a KILL of a current chain, record it.
     687        5982 :     if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
     688             :       DEBUG(dbgs() << "Kill seen for chain " << TRI->getName(MO.getReg())
     689             :             << "\n");
     690         141 :       ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
     691             :     }
     692        7264 :     ActiveChains.erase(MO.getReg());
     693             : 
     694        1125 :   } else if (MO.isRegMask()) {
     695             : 
     696         120 :     for (auto I = ActiveChains.begin(), E = ActiveChains.end();
     697          66 :          I != E;) {
     698          12 :       if (MO.clobbersPhysReg(I->first)) {
     699             :         DEBUG(dbgs() << "Kill (regmask) seen for chain "
     700             :               << TRI->getName(I->first) << "\n");
     701          12 :         I->second->setKill(MI, Idx, /*Immutable=*/true);
     702          12 :         ActiveChains.erase(I++);
     703             :       } else
     704             :         ++I;
     705             :     }
     706             : 
     707             :   }
     708        4757 : }
     709             : 
     710             : Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
     711        1088 :   if ((TRI->getEncodingValue(Reg) % 2) == 0)
     712             :     return Color::Even;
     713             :   else
     714             :     return Color::Odd;
     715             : }
     716             : 
     717             : // Factory function used by AArch64TargetMachine to add the pass to the passmanager.
     718         908 : FunctionPass *llvm::createAArch64A57FPLoadBalancing() {
     719         908 :   return new AArch64A57FPLoadBalancing();
     720      216918 : }

Generated by: LCOV version 1.13