LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64A57FPLoadBalancing.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 131 176 74.4 %
Date: 2018-10-20 13:21:21 Functions: 14 24 58.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : // For best-case performance on Cortex-A57, we should try to use a balanced
      10             : // mix of odd and even D-registers when performing a critical sequence of
      11             : // independent, non-quadword FP/ASIMD floating-point multiply or
      12             : // multiply-accumulate operations.
      13             : //
      14             : // This pass attempts to detect situations where the register allocation may
      15             : // adversely affect this load balancing and to change the registers used so as
      16             : // to better utilize the CPU.
      17             : //
      18             : // Ideally we'd just take each multiply or multiply-accumulate in turn and
      19             : // allocate it alternating even or odd registers. However, multiply-accumulates
      20             : // are most efficiently performed in the same functional unit as their
      21             : // accumulation operand. Therefore this pass tries to find maximal sequences
      22             : // ("Chains") of multiply-accumulates linked via their accumulation operand,
      23             : // and assign them all the same "color" (oddness/evenness).
      24             : //
      25             : // This optimization affects S-register and D-register floating point
      26             : // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
      27             : // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
      28             : // not affected.
      29             : //===----------------------------------------------------------------------===//
      30             : 
      31             : #include "AArch64.h"
      32             : #include "AArch64InstrInfo.h"
      33             : #include "AArch64Subtarget.h"
      34             : #include "llvm/ADT/BitVector.h"
      35             : #include "llvm/ADT/EquivalenceClasses.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineFunctionPass.h"
      38             : #include "llvm/CodeGen/MachineInstr.h"
      39             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      40             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      41             : #include "llvm/CodeGen/RegisterClassInfo.h"
      42             : #include "llvm/CodeGen/RegisterScavenging.h"
      43             : #include "llvm/Support/CommandLine.h"
      44             : #include "llvm/Support/Debug.h"
      45             : #include "llvm/Support/raw_ostream.h"
      46             : using namespace llvm;
      47             : 
      48             : #define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
      49             : 
      50             : // Enforce the algorithm to use the scavenged register even when the original
      51             : // destination register is the correct color. Used for testing.
      52             : static cl::opt<bool>
      53             : TransformAll("aarch64-a57-fp-load-balancing-force-all",
      54             :              cl::desc("Always modify dest registers regardless of color"),
      55             :              cl::init(false), cl::Hidden);
      56             : 
      57             : // Never use the balance information obtained from chains - return a specific
      58             : // color always. Used for testing.
      59             : static cl::opt<unsigned>
      60             : OverrideBalance("aarch64-a57-fp-load-balancing-override",
      61             :               cl::desc("Ignore balance information, always return "
      62             :                        "(1: Even, 2: Odd)."),
      63             :               cl::init(0), cl::Hidden);
      64             : 
      65             : //===----------------------------------------------------------------------===//
      66             : // Helper functions
      67             : 
      68             : // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
      69             : static bool isMul(MachineInstr *MI) {
      70        1770 :   switch (MI->getOpcode()) {
      71             :   case AArch64::FMULSrr:
      72             :   case AArch64::FNMULSrr:
      73             :   case AArch64::FMULDrr:
      74             :   case AArch64::FNMULDrr:
      75             :     return true;
      76             :   default:
      77             :     return false;
      78             :   }
      79             : }
      80             : 
      81             : // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
      82        1693 : static bool isMla(MachineInstr *MI) {
      83        3386 :   switch (MI->getOpcode()) {
      84             :   case AArch64::FMSUBSrrr:
      85             :   case AArch64::FMADDSrrr:
      86             :   case AArch64::FNMSUBSrrr:
      87             :   case AArch64::FNMADDSrrr:
      88             :   case AArch64::FMSUBDrrr:
      89             :   case AArch64::FMADDDrrr:
      90             :   case AArch64::FNMSUBDrrr:
      91             :   case AArch64::FNMADDDrrr:
      92             :     return true;
      93        1489 :   default:
      94        1489 :     return false;
      95             :   }
      96             : }
      97             : 
      98             : //===----------------------------------------------------------------------===//
      99             : 
     100             : namespace {
     101             : /// A "color", which is either even or odd. Yes, these aren't really colors
     102             : /// but the algorithm is conceptually doing two-color graph coloring.
     103             : enum class Color { Even, Odd };
     104             : #ifndef NDEBUG
     105             : static const char *ColorNames[2] = { "Even", "Odd" };
     106             : #endif
     107             : 
     108             : class Chain;
     109             : 
     110             : class AArch64A57FPLoadBalancing : public MachineFunctionPass {
     111             :   MachineRegisterInfo *MRI;
     112             :   const TargetRegisterInfo *TRI;
     113             :   RegisterClassInfo RCI;
     114             : 
     115             : public:
     116             :   static char ID;
     117        1114 :   explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
     118        1114 :     initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
     119        1114 :   }
     120             : 
     121             :   bool runOnMachineFunction(MachineFunction &F) override;
     122             : 
     123        1099 :   MachineFunctionProperties getRequiredProperties() const override {
     124        1099 :     return MachineFunctionProperties().set(
     125        1099 :         MachineFunctionProperties::Property::NoVRegs);
     126             :   }
     127             : 
     128        1106 :   StringRef getPassName() const override {
     129        1106 :     return "A57 FP Anti-dependency breaker";
     130             :   }
     131             : 
     132        1099 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     133        1099 :     AU.setPreservesCFG();
     134        1099 :     MachineFunctionPass::getAnalysisUsage(AU);
     135        1099 :   }
     136             : 
     137             : private:
     138             :   bool runOnBasicBlock(MachineBasicBlock &MBB);
     139             :   bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
     140             :                      int &Balance);
     141             :   bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
     142             :   int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
     143             :   void scanInstruction(MachineInstr *MI, unsigned Idx,
     144             :                        std::map<unsigned, Chain*> &Active,
     145             :                        std::vector<std::unique_ptr<Chain>> &AllChains);
     146             :   void maybeKillChain(MachineOperand &MO, unsigned Idx,
     147             :                       std::map<unsigned, Chain*> &RegChains);
     148             :   Color getColor(unsigned Register);
     149             :   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
     150             : };
     151             : }
     152             : 
     153             : char AArch64A57FPLoadBalancing::ID = 0;
     154             : 
     155       85109 : INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
     156             :                       "AArch64 A57 FP Load-Balancing", false, false)
     157      200146 : INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
     158             :                     "AArch64 A57 FP Load-Balancing", false, false)
     159             : 
     160             : namespace {
     161             : /// A Chain is a sequence of instructions that are linked together by
     162             : /// an accumulation operand. For example:
     163             : ///
     164             : ///   fmul def d0, ?
     165             : ///   fmla def d1, ?, ?, killed d0
     166             : ///   fmla def d2, ?, ?, killed d1
     167             : ///
     168             : /// There may be other instructions interleaved in the sequence that
     169             : /// do not belong to the chain. These other instructions must not use
     170             : /// the "chain" register at any point.
     171             : ///
     172             : /// We currently only support chains where the "chain" operand is killed
     173             : /// at each link in the chain for simplicity.
     174             : /// A chain has three important instructions - Start, Last and Kill.
     175             : ///   * The start instruction is the first instruction in the chain.
     176             : ///   * Last is the final instruction in the chain.
     177             : ///   * Kill may or may not be defined. If defined, Kill is the instruction
     178             : ///     where the outgoing value of the Last instruction is killed.
     179             : ///     This information is important as if we know the outgoing value is
     180             : ///     killed with no intervening uses, we can safely change its register.
     181             : ///
     182             : /// Without a kill instruction, we must assume the outgoing value escapes
     183             : /// beyond our model and either must not change its register or must
     184             : /// create a fixup FMOV to keep the old register value consistent.
     185             : ///
     186           0 : class Chain {
     187             : public:
     188             :   /// The important (marker) instructions.
     189             :   MachineInstr *StartInst, *LastInst, *KillInst;
     190             :   /// The index, from the start of the basic block, that each marker
     191             :   /// appears. These are stored so we can do quick interval tests.
     192             :   unsigned StartInstIdx, LastInstIdx, KillInstIdx;
     193             :   /// All instructions in the chain.
     194             :   std::set<MachineInstr*> Insts;
     195             :   /// True if KillInst cannot be modified. If this is true,
     196             :   /// we cannot change LastInst's outgoing register.
     197             :   /// This will be true for tied values and regmasks.
     198             :   bool KillIsImmutable;
     199             :   /// The "color" of LastInst. This will be the preferred chain color,
     200             :   /// as changing intermediate nodes is easy but changing the last
     201             :   /// instruction can be more tricky.
     202             :   Color LastColor;
     203             : 
     204             :   Chain(MachineInstr *MI, unsigned Idx, Color C)
     205           0 :       : StartInst(MI), LastInst(MI), KillInst(nullptr),
     206             :         StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
     207           0 :         LastColor(C) {
     208             :     Insts.insert(MI);
     209             :   }
     210             : 
     211             :   /// Add a new instruction into the chain. The instruction's dest operand
     212             :   /// has the given color.
     213             :   void add(MachineInstr *MI, unsigned Idx, Color C) {
     214         162 :     LastInst = MI;
     215         162 :     LastInstIdx = Idx;
     216         162 :     LastColor = C;
     217             :     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
     218             :            "Chain: broken invariant. A Chain can only be killed after its last "
     219             :            "def");
     220             : 
     221             :     Insts.insert(MI);
     222             :   }
     223             : 
     224             :   /// Return true if MI is a member of the chain.
     225         705 :   bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
     226             : 
     227             :   /// Return the number of instructions in the chain.
     228             :   unsigned size() const {
     229          85 :     return Insts.size();
     230             :   }
     231             : 
     232             :   /// Inform the chain that its last active register (the dest register of
     233             :   /// LastInst) is killed by MI with no intervening uses or defs.
     234             :   void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
     235           0 :     KillInst = MI;
     236           0 :     KillInstIdx = Idx;
     237           0 :     KillIsImmutable = Immutable;
     238             :     assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
     239             :            "Chain: broken invariant. A Chain can only be killed after its last "
     240             :            "def");
     241             :   }
     242             : 
     243             :   /// Return the first instruction in the chain.
     244           0 :   MachineInstr *getStart() const { return StartInst; }
     245             :   /// Return the last instruction in the chain.
     246           0 :   MachineInstr *getLast() const { return LastInst; }
     247             :   /// Return the "kill" instruction (as set with setKill()) or NULL.
     248           0 :   MachineInstr *getKill() const { return KillInst; }
     249             :   /// Return an instruction that can be used as an iterator for the end
     250             :   /// of the chain. This is the maximum of KillInst (if set) and LastInst.
     251           0 :   MachineBasicBlock::iterator end() const {
     252           0 :     return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst);
     253             :   }
     254         119 :   MachineBasicBlock::iterator begin() const { return getStart(); }
     255             : 
     256             :   /// Can the Kill instruction (assuming one exists) be modified?
     257           0 :   bool isKillImmutable() const { return KillIsImmutable; }
     258             : 
     259             :   /// Return the preferred color of this chain.
     260           0 :   Color getPreferredColor() {
     261         169 :     if (OverrideBalance != 0)
     262          60 :       return OverrideBalance == 1 ? Color::Even : Color::Odd;
     263         109 :     return LastColor;
     264             :   }
     265             : 
     266             :   /// Return true if this chain (StartInst..KillInst) overlaps with Other.
     267             :   bool rangeOverlapsWith(const Chain &Other) const {
     268          44 :     unsigned End = KillInst ? KillInstIdx : LastInstIdx;
     269          44 :     unsigned OtherEnd = Other.KillInst ?
     270             :       Other.KillInstIdx : Other.LastInstIdx;
     271             : 
     272          44 :     return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
     273             :   }
     274             : 
     275             :   /// Return true if this chain starts before Other.
     276           0 :   bool startsBefore(const Chain *Other) const {
     277           0 :     return StartInstIdx < Other->StartInstIdx;
     278             :   }
     279             : 
     280             :   /// Return true if the group will require a fixup MOV at the end.
     281             :   bool requiresFixup() const {
     282         400 :     return (getKill() && isKillImmutable()) || !getKill();
     283             :   }
     284             : 
     285             :   /// Return a simple string representation of the chain.
     286             :   std::string str() const {
     287             :     std::string S;
     288             :     raw_string_ostream OS(S);
     289             : 
     290             :     OS << "{";
     291             :     StartInst->print(OS, /* SkipOpers= */true);
     292             :     OS << " -> ";
     293             :     LastInst->print(OS, /* SkipOpers= */true);
     294             :     if (KillInst) {
     295             :       OS << " (kill @ ";
     296             :       KillInst->print(OS, /* SkipOpers= */true);
     297             :       OS << ")";
     298             :     }
     299             :     OS << "}";
     300             : 
     301             :     return OS.str();
     302             :   }
     303             : 
     304             : };
     305             : 
     306             : } // end anonymous namespace
     307             : 
     308             : //===----------------------------------------------------------------------===//
     309             : 
     310       14080 : bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
     311       14080 :   if (skipFunction(F.getFunction()))
     312             :     return false;
     313             : 
     314       14074 :   if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
     315             :     return false;
     316             : 
     317             :   bool Changed = false;
     318             :   LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
     319             : 
     320         199 :   MRI = &F.getRegInfo();
     321         199 :   TRI = F.getRegInfo().getTargetRegisterInfo();
     322         199 :   RCI.runOnMachineFunction(F);
     323             : 
     324         454 :   for (auto &MBB : F) {
     325         255 :     Changed |= runOnBasicBlock(MBB);
     326             :   }
     327             : 
     328             :   return Changed;
     329             : }
     330             : 
     331         255 : bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
     332             :   bool Changed = false;
     333             :   LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB
     334             :                     << " - scanning instructions...\n");
     335             : 
     336             :   // First, scan the basic block producing a set of chains.
     337             : 
     338             :   // The currently "active" chains - chains that can be added to and haven't
     339             :   // been killed yet. This is keyed by register - all chains can only have one
     340             :   // "link" register between each inst in the chain.
     341             :   std::map<unsigned, Chain*> ActiveChains;
     342         255 :   std::vector<std::unique_ptr<Chain>> AllChains;
     343             :   unsigned Idx = 0;
     344        2025 :   for (auto &MI : MBB)
     345        1770 :     scanInstruction(&MI, Idx++, ActiveChains, AllChains);
     346             : 
     347             :   LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size()
     348             :                     << " chains created.\n");
     349             : 
     350             :   // Group the chains into disjoint sets based on their liveness range. This is
     351             :   // a poor-man's version of graph coloring. Ideally we'd create an interference
     352             :   // graph and perform full-on graph coloring on that, but;
     353             :   //   (a) That's rather heavyweight for only two colors.
     354             :   //   (b) We expect multiple disjoint interference regions - in practice the live
     355             :   //       range of chains is quite small and they are clustered between loads
     356             :   //       and stores.
     357             :   EquivalenceClasses<Chain*> EC;
     358         374 :   for (auto &I : AllChains)
     359             :     EC.insert(I.get());
     360             : 
     361         374 :   for (auto &I : AllChains)
     362         282 :     for (auto &J : AllChains)
     363         163 :       if (I != J && I->rangeOverlapsWith(*J))
     364          64 :         EC.unionSets(I.get(), J.get());
     365             :   LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
     366             : 
     367             :   // Now we assume that every member of an equivalence class interferes
     368             :   // with every other member of that class, and with no members of other classes.
     369             : 
     370             :   // Convert the EquivalenceClasses to a simpler set of sets.
     371         255 :   std::vector<std::vector<Chain*> > V;
     372         374 :   for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
     373             :     std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
     374         119 :     if (Cs.empty()) continue;
     375             :     V.push_back(std::move(Cs));
     376             :   }
     377             : 
     378             :   // Now we have a set of sets, order them by start address so
     379             :   // we can iterate over them sequentially.
     380             :   llvm::sort(V,
     381             :              [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
     382           0 :                return A.front()->startsBefore(B.front());
     383             :              });
     384             : 
     385             :   // As we only have two colors, we can track the global (BB-level) balance of
     386             :   // odds versus evens. We aim to keep this near zero to keep both execution
     387             :   // units fed.
     388             :   // Positive means we're even-heavy, negative we're odd-heavy.
     389             :   //
     390             :   // FIXME: If chains have interdependencies, for example:
     391             :   //   mul r0, r1, r2
     392             :   //   mul r3, r0, r1
     393             :   // We do not model this and may color each one differently, assuming we'll
     394             :   // get ILP when we obviously can't. This hasn't been seen to be a problem
     395             :   // in practice so far, so we simplify the algorithm by ignoring it.
     396         255 :   int Parity = 0;
     397             : 
     398         358 :   for (auto &I : V)
     399         206 :     Changed |= colorChainSet(std::move(I), MBB, Parity);
     400             : 
     401         255 :   return Changed;
     402             : }
     403             : 
     404           0 : Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
     405             :                                                   std::vector<Chain*> &L) {
     406           0 :   if (L.empty())
     407           0 :     return nullptr;
     408             : 
     409             :   // We try and get the best candidate from L to color next, given that our
     410             :   // preferred color is "PreferredColor". L is ordered from larger to smaller
     411             :   // chains. It is beneficial to color the large chains before the small chains,
     412             :   // but if we can't find a chain of the maximum length with the preferred color,
     413             :   // we fuzz the size and look for slightly smaller chains before giving up and
     414             :   // returning a chain that must be recolored.
     415             : 
     416             :   // FIXME: Does this need to be configurable?
     417             :   const unsigned SizeFuzz = 1;
     418           0 :   unsigned MinSize = L.front()->size() - SizeFuzz;
     419           0 :   for (auto I = L.begin(), E = L.end(); I != E; ++I) {
     420           0 :     if ((*I)->size() <= MinSize) {
     421             :       // We've gone past the size limit. Return the previous item.
     422           0 :       Chain *Ch = *--I;
     423             :       L.erase(I);
     424           0 :       return Ch;
     425             :     }
     426             : 
     427           0 :     if ((*I)->getPreferredColor() == PreferredColor) {
     428             :       Chain *Ch = *I;
     429             :       L.erase(I);
     430           0 :       return Ch;
     431             :     }
     432             :   }
     433             : 
     434             :   // Bailout case - just return the first item.
     435             :   Chain *Ch = L.front();
     436             :   L.erase(L.begin());
     437           0 :   return Ch;
     438             : }
     439             : 
     440         103 : bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
     441             :                                               MachineBasicBlock &MBB,
     442             :                                               int &Parity) {
     443             :   bool Changed = false;
     444             :   LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
     445             : 
     446             :   // Sort by descending size order so that we allocate the most important
     447             :   // sets first.
     448             :   // Tie-break equivalent sizes by sorting chains requiring fixups before
     449             :   // those without fixups. The logic here is that we should look at the
     450             :   // chains that we cannot change before we look at those we can,
     451             :   // so the parity counter is updated and we know what color we should
     452             :   // change them to!
     453             :   // Final tie-break with instruction order so pass output is stable (i.e. not
     454             :   // dependent on malloc'd pointer values).
     455             :   llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
     456             :     if (G1->size() != G2->size())
     457             :       return G1->size() > G2->size();
     458             :     if (G1->requiresFixup() != G2->requiresFixup())
     459             :       return G1->requiresFixup() > G2->requiresFixup();
     460             :     // Make sure startsBefore() produces a stable final order.
     461             :     assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
     462             :            "Starts before not total order!");
     463             :     return G1->startsBefore(G2);
     464             :   });
     465             : 
     466         103 :   Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
     467         222 :   while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
     468             :     // Start off by assuming we'll color to our own preferred color.
     469             :     Color C = PreferredColor;
     470         119 :     if (Parity == 0)
     471             :       // But if we really don't care, use the chain's preferred color.
     472             :       C = G->getPreferredColor();
     473             : 
     474             :     LLVM_DEBUG(dbgs() << " - Parity=" << Parity
     475             :                       << ", Color=" << ColorNames[(int)C] << "\n");
     476             : 
     477             :     // If we'll need a fixup FMOV, don't bother. Testing has shown that this
     478             :     // happens infrequently and when it does it has at least a 50% chance of
     479             :     // slowing code down instead of speeding it up.
     480          72 :     if (G->requiresFixup() && C != G->getPreferredColor()) {
     481             :       C = G->getPreferredColor();
     482             :       LLVM_DEBUG(dbgs() << " - " << G->str()
     483             :                         << " - not worthwhile changing; "
     484             :                            "color remains "
     485             :                         << ColorNames[(int)C] << "\n");
     486             :     }
     487             : 
     488         119 :     Changed |= colorChain(G, C, MBB);
     489             : 
     490         119 :     Parity += (C == Color::Even) ? G->size() : -G->size();
     491         119 :     PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
     492             :   }
     493             : 
     494         103 :   return Changed;
     495             : }
     496             : 
     497         119 : int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
     498             :                                                 MachineBasicBlock &MBB) {
     499             :   // Can we find an appropriate register that is available throughout the life
     500             :   // of the chain? Simulate liveness backwards until the end of the chain.
     501         119 :   LiveRegUnits Units(*TRI);
     502         119 :   Units.addLiveOuts(MBB);
     503         119 :   MachineBasicBlock::iterator I = MBB.end();
     504         119 :   MachineBasicBlock::iterator ChainEnd = G->end();
     505         363 :   while (I != ChainEnd) {
     506             :     --I;
     507         244 :     Units.stepBackward(*I);
     508             :   }
     509             : 
     510             :   // Check which register units are alive throughout the chain.
     511             :   MachineBasicBlock::iterator ChainBegin = G->begin();
     512             :   assert(ChainBegin != ChainEnd && "Chain should contain instructions");
     513             :   do {
     514             :     --I;
     515         424 :     Units.accumulate(*I);
     516         424 :   } while (I != ChainBegin);
     517             : 
     518             :   // Make sure we allocate in-order, to get the cheapest registers first.
     519         119 :   unsigned RegClassID = ChainBegin->getDesc().OpInfo[0].RegClass;
     520         238 :   auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
     521         701 :   for (auto Reg : Ord) {
     522         701 :     if (!Units.available(Reg))
     523             :       continue;
     524         204 :     if (C == getColor(Reg))
     525         119 :       return Reg;
     526             :   }
     527             : 
     528             :   return -1;
     529             : }
     530             : 
     531         119 : bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
     532             :                                            MachineBasicBlock &MBB) {
     533             :   bool Changed = false;
     534             :   LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
     535             :                     << ColorNames[(int)C] << ")\n");
     536             : 
     537             :   // Try and obtain a free register of the right class. Without a register
     538             :   // to play with we cannot continue.
     539         119 :   int Reg = scavengeRegister(G, C, MBB);
     540         119 :   if (Reg == -1) {
     541             :     LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
     542             :     return false;
     543             :   }
     544             :   LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
     545             : 
     546             :   std::map<unsigned, unsigned> Substs;
     547         662 :   for (MachineInstr &I : *G) {
     548         143 :     if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
     549          96 :       continue;
     550             : 
     551             :     // I is a member of G, or I is a mutable instruction that kills G.
     552             : 
     553             :     std::vector<unsigned> ToErase;
     554        1515 :     for (auto &U : I.operands()) {
     555        2046 :       if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
     556         210 :         unsigned OrigReg = U.getReg();
     557         210 :         U.setReg(Substs[OrigReg]);
     558         210 :         if (U.isKill())
     559             :           // Don't erase straight away, because there may be other operands
     560             :           // that also reference this substitution!
     561         204 :           ToErase.push_back(OrigReg);
     562         977 :       } else if (U.isRegMask()) {
     563           0 :         for (auto J : Substs) {
     564           0 :           if (U.clobbersPhysReg(J.first))
     565           0 :             ToErase.push_back(J.first);
     566             :         }
     567             :       }
     568             :     }
     569             :     // Now it's safe to remove the substs identified earlier.
     570         532 :     for (auto J : ToErase)
     571             :       Substs.erase(J);
     572             : 
     573             :     // Only change the def if this isn't the last instruction.
     574         328 :     if (&I != G->getKill()) {
     575         281 :       MachineOperand &MO = I.getOperand(0);
     576             : 
     577         340 :       bool Change = TransformAll || getColor(MO.getReg()) != C;
     578         126 :       if (G->requiresFixup() && &I == G->getLast())
     579             :         Change = false;
     580             : 
     581         209 :       if (Change) {
     582         204 :         Substs[MO.getReg()] = Reg;
     583         204 :         MO.setReg(Reg);
     584             : 
     585             :         Changed = true;
     586             :       }
     587             :     }
     588             :   }
     589             :   assert(Substs.size() == 0 && "No substitutions should be left active!");
     590             : 
     591             :   if (G->getKill()) {
     592             :     LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n");
     593             :   } else {
     594             :     // We didn't have a kill instruction, but we didn't seem to need to change
     595             :     // the destination register anyway.
     596             :     LLVM_DEBUG(dbgs() << " - Destination register not changed.\n");
     597             :   }
     598             :   return Changed;
     599             : }
     600             : 
     601        1770 : void AArch64A57FPLoadBalancing::scanInstruction(
     602             :     MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
     603             :     std::vector<std::unique_ptr<Chain>> &AllChains) {
     604             :   // Inspect "MI", updating ActiveChains and AllChains.
     605             : 
     606             :   if (isMul(MI)) {
     607             : 
     608         231 :     for (auto &I : MI->uses())
     609         154 :       maybeKillChain(I, Idx, ActiveChains);
     610         154 :     for (auto &I : MI->defs())
     611          77 :       maybeKillChain(I, Idx, ActiveChains);
     612             : 
     613             :     // Create a new chain. Multiplies don't require forwarding so can go on any
     614             :     // unit.
     615          77 :     unsigned DestReg = MI->getOperand(0).getReg();
     616             : 
     617             :     LLVM_DEBUG(dbgs() << "New chain started for register "
     618             :                       << printReg(DestReg, TRI) << " at " << *MI);
     619             : 
     620          77 :     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     621          77 :     ActiveChains[DestReg] = G.get();
     622             :     AllChains.push_back(std::move(G));
     623             : 
     624        1693 :   } else if (isMla(MI)) {
     625             : 
     626             :     // It is beneficial to keep MLAs on the same functional unit as their
     627             :     // accumulator operand.
     628         204 :     unsigned DestReg  = MI->getOperand(0).getReg();
     629         204 :     unsigned AccumReg = MI->getOperand(3).getReg();
     630             : 
     631         204 :     maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
     632         408 :     maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
     633         204 :     if (DestReg != AccumReg)
     634          72 :       maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
     635             : 
     636         204 :     if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
     637             :       LLVM_DEBUG(dbgs() << "Chain found for accumulator register "
     638             :                         << printReg(AccumReg, TRI) << " in MI " << *MI);
     639             : 
     640             :       // For simplicity we only chain together sequences of MULs/MLAs where the
     641             :       // accumulator register is killed on each instruction. This means we don't
     642             :       // need to track other uses of the registers we want to rewrite.
     643             :       //
     644             :       // FIXME: We could extend to handle the non-kill cases for more coverage.
     645         324 :       if (MI->getOperand(3).isKill()) {
     646             :         // Add to chain.
     647             :         LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
     648         162 :         ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
     649             :         // Handle cases where the destination is not the same as the accumulator.
     650         162 :         if (DestReg != AccumReg) {
     651          36 :           ActiveChains[DestReg] = ActiveChains[AccumReg];
     652             :           ActiveChains.erase(AccumReg);
     653             :         }
     654         162 :         return;
     655             :       }
     656             : 
     657             :       LLVM_DEBUG(
     658             :           dbgs() << "Cannot add to chain because accumulator operand wasn't "
     659             :                  << "marked <kill>!\n");
     660           0 :       maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
     661             :     }
     662             : 
     663             :     LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
     664             :                       << printReg(DestReg, TRI) << "\n");
     665          42 :     auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
     666          42 :     ActiveChains[DestReg] = G.get();
     667             :     AllChains.push_back(std::move(G));
     668             : 
     669             :   } else {
     670             : 
     671             :     // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
     672             :     // lists.
     673        4791 :     for (auto &I : MI->uses())
     674        3302 :       maybeKillChain(I, Idx, ActiveChains);
     675        2369 :     for (auto &I : MI->defs())
     676         880 :       maybeKillChain(I, Idx, ActiveChains);
     677             : 
     678             :   }
     679             : }
     680             : 
     681           0 : void AArch64A57FPLoadBalancing::
     682             : maybeKillChain(MachineOperand &MO, unsigned Idx,
     683             :                std::map<unsigned, Chain*> &ActiveChains) {
     684             :   // Given an operand and the set of active chains (keyed by register),
     685             :   // determine if a chain should be ended and remove from ActiveChains.
     686           0 :   MachineInstr *MI = MO.getParent();
     687             : 
     688           0 :   if (MO.isReg()) {
     689             : 
     690             :     // If this is a KILL of a current chain, record it.
     691           0 :     if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
     692             :       LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
     693             :                         << "\n");
     694           0 :       ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
     695             :     }
     696           0 :     ActiveChains.erase(MO.getReg());
     697             : 
     698           0 :   } else if (MO.isRegMask()) {
     699             : 
     700             :     for (auto I = ActiveChains.begin(), E = ActiveChains.end();
     701           0 :          I != E;) {
     702           0 :       if (MO.clobbersPhysReg(I->first)) {
     703             :         LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain "
     704             :                           << printReg(I->first, TRI) << "\n");
     705           0 :         I->second->setKill(MI, Idx, /*Immutable=*/true);
     706             :         ActiveChains.erase(I++);
     707             :       } else
     708             :         ++I;
     709             :     }
     710             : 
     711             :   }
     712           0 : }
     713             : 
     714           0 : Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
     715        1088 :   if ((TRI->getEncodingValue(Reg) % 2) == 0)
     716           0 :     return Color::Even;
     717             :   else
     718           0 :     return Color::Odd;
     719             : }
     720             : 
     721             : // Factory function used by AArch64TargetMachine to add the pass to the passmanager.
     722        1114 : FunctionPass *llvm::createAArch64A57FPLoadBalancing() {
     723        1114 :   return new AArch64A57FPLoadBalancing();
     724             : }

Generated by: LCOV version 1.13