LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FalkorHWPFFix.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 165 168 98.2 %
Date: 2018-06-17 00:07:59 Functions: 21 22 95.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
      10             : /// that may inhibit the HW prefetching.  This is done in two steps.  Before
      11             : /// ISel, we mark strided loads (i.e. those that will likely benefit from
      12             : /// prefetching) with metadata.  Then, after opcodes have been finalized, we
      13             : /// insert MOVs and re-write loads to prevent unintentional tag collisions.
      14             : // ===---------------------------------------------------------------------===//
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64Subtarget.h"
      19             : #include "AArch64TargetMachine.h"
      20             : #include "llvm/ADT/DenseMap.h"
      21             : #include "llvm/ADT/DepthFirstIterator.h"
      22             : #include "llvm/ADT/None.h"
      23             : #include "llvm/ADT/Optional.h"
      24             : #include "llvm/ADT/SmallVector.h"
      25             : #include "llvm/ADT/Statistic.h"
      26             : #include "llvm/Analysis/LoopInfo.h"
      27             : #include "llvm/Analysis/ScalarEvolution.h"
      28             : #include "llvm/Analysis/ScalarEvolutionExpressions.h"
      29             : #include "llvm/CodeGen/LiveRegUnits.h"
      30             : #include "llvm/CodeGen/MachineBasicBlock.h"
      31             : #include "llvm/CodeGen/MachineFunction.h"
      32             : #include "llvm/CodeGen/MachineFunctionPass.h"
      33             : #include "llvm/CodeGen/MachineInstr.h"
      34             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      35             : #include "llvm/CodeGen/MachineLoopInfo.h"
      36             : #include "llvm/CodeGen/MachineOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetPassConfig.h"
      39             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      40             : #include "llvm/IR/DebugLoc.h"
      41             : #include "llvm/IR/Dominators.h"
      42             : #include "llvm/IR/Function.h"
      43             : #include "llvm/IR/Instruction.h"
      44             : #include "llvm/IR/Instructions.h"
      45             : #include "llvm/IR/Metadata.h"
      46             : #include "llvm/Pass.h"
      47             : #include "llvm/Support/Casting.h"
      48             : #include "llvm/Support/Debug.h"
      49             : #include "llvm/Support/DebugCounter.h"
      50             : #include "llvm/Support/raw_ostream.h"
      51             : #include <cassert>
      52             : #include <iterator>
      53             : #include <utility>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : #define DEBUG_TYPE "falkor-hwpf-fix"
      58             : 
      59             : STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
      60             : STATISTIC(NumCollisionsAvoided,
      61             :           "Number of HW prefetch tag collisions avoided");
      62             : STATISTIC(NumCollisionsNotAvoided,
      63             :           "Number of HW prefetch tag collisions not avoided due to lack of registers");
      64      101169 : DEBUG_COUNTER(FixCounter, "falkor-hwpf",
      65             :               "Controls which tag collisions are avoided");
      66             : 
      67             : namespace {
      68             : 
      69             : class FalkorMarkStridedAccesses {
      70             : public:
      71             :   FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
      72          52 :       : LI(LI), SE(SE) {}
      73             : 
      74             :   bool run();
      75             : 
      76             : private:
      77             :   bool runOnLoop(Loop &L);
      78             : 
      79             :   LoopInfo &LI;
      80             :   ScalarEvolution &SE;
      81             : };
      82             : 
      83        2050 : class FalkorMarkStridedAccessesLegacy : public FunctionPass {
      84             : public:
      85             :   static char ID; // Pass ID, replacement for typeid
      86             : 
      87             :   FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
      88        1032 :     initializeFalkorMarkStridedAccessesLegacyPass(
      89        1032 :         *PassRegistry::getPassRegistry());
      90             :   }
      91             : 
      92        1026 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      93             :     AU.addRequired<TargetPassConfig>();
      94             :     AU.addPreserved<DominatorTreeWrapperPass>();
      95             :     AU.addRequired<LoopInfoWrapperPass>();
      96             :     AU.addPreserved<LoopInfoWrapperPass>();
      97             :     AU.addRequired<ScalarEvolutionWrapperPass>();
      98             :     AU.addPreserved<ScalarEvolutionWrapperPass>();
      99        1026 :   }
     100             : 
     101             :   bool runOnFunction(Function &F) override;
     102             : };
     103             : 
     104             : } // end anonymous namespace
     105             : 
     106             : char FalkorMarkStridedAccessesLegacy::ID = 0;
     107             : 
     108       76339 : INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
     109             :                       "Falkor HW Prefetch Fix", false, false)
     110       76339 : INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
     111       76339 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
     112       76339 : INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
     113      359160 : INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
     114             :                     "Falkor HW Prefetch Fix", false, false)
     115             : 
     116        1030 : FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
     117        2060 :   return new FalkorMarkStridedAccessesLegacy();
     118             : }
     119             : 
     120       13331 : bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
     121       13331 :   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
     122             :   const AArch64Subtarget *ST =
     123       13331 :       TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
     124       13331 :   if (ST->getProcFamily() != AArch64Subtarget::Falkor)
     125             :     return false;
     126             : 
     127          52 :   if (skipFunction(F))
     128             :     return false;
     129             : 
     130          52 :   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     131          52 :   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     132             : 
     133             :   FalkorMarkStridedAccesses LDP(LI, SE);
     134          52 :   return LDP.run();
     135             : }
     136             : 
     137          52 : bool FalkorMarkStridedAccesses::run() {
     138             :   bool MadeChange = false;
     139             : 
     140         109 :   for (Loop *L : LI)
     141          16 :     for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
     142           6 :       MadeChange |= runOnLoop(**LIt);
     143             : 
     144          52 :   return MadeChange;
     145             : }
     146             : 
     147           6 : bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
     148             :   // Only mark strided loads in the inner-most loop
     149           6 :   if (!L.empty())
     150             :     return false;
     151             : 
     152             :   bool MadeChange = false;
     153             : 
     154          15 :   for (BasicBlock *BB : L.blocks()) {
     155          70 :     for (Instruction &I : *BB) {
     156             :       LoadInst *LoadI = dyn_cast<LoadInst>(&I);
     157          52 :       if (!LoadI)
     158             :         continue;
     159             : 
     160             :       Value *PtrValue = LoadI->getPointerOperand();
     161          13 :       if (L.isLoopInvariant(PtrValue))
     162             :         continue;
     163             : 
     164          13 :       const SCEV *LSCEV = SE.getSCEV(PtrValue);
     165             :       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
     166          13 :       if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
     167             :         continue;
     168             : 
     169          24 :       LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
     170          12 :                          MDNode::get(LoadI->getContext(), {}));
     171             :       ++NumStridedLoadsMarked;
     172             :       LLVM_DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
     173             :       MadeChange = true;
     174             :     }
     175             :   }
     176             : 
     177             :   return MadeChange;
     178             : }
     179             : 
     180             : namespace {
     181             : 
     182        3072 : class FalkorHWPFFix : public MachineFunctionPass {
     183             : public:
     184             :   static char ID;
     185             : 
     186        1031 :   FalkorHWPFFix() : MachineFunctionPass(ID) {
     187        1031 :     initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry());
     188        1031 :   }
     189             : 
     190             :   bool runOnMachineFunction(MachineFunction &Fn) override;
     191             : 
     192        1017 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     193        1017 :     AU.setPreservesCFG();
     194             :     AU.addRequired<MachineLoopInfo>();
     195        1017 :     MachineFunctionPass::getAnalysisUsage(AU);
     196        1017 :   }
     197             : 
     198        1017 :   MachineFunctionProperties getRequiredProperties() const override {
     199        2034 :     return MachineFunctionProperties().set(
     200        1017 :         MachineFunctionProperties::Property::NoVRegs);
     201             :   }
     202             : 
     203             : private:
     204             :   void runOnLoop(MachineLoop &L, MachineFunction &Fn);
     205             : 
     206             :   const AArch64InstrInfo *TII;
     207             :   const TargetRegisterInfo *TRI;
     208             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap;
     209             :   bool Modified;
     210             : };
     211             : 
     212             : /// Bits from load opcodes used to compute HW prefetcher instruction tags.
     213             : struct LoadInfo {
     214             :   LoadInfo() = default;
     215             : 
     216             :   unsigned DestReg = 0;
     217             :   unsigned BaseReg = 0;
     218             :   int BaseRegIdx = -1;
     219             :   const MachineOperand *OffsetOpnd = nullptr;
     220             :   bool IsPrePost = false;
     221             : };
     222             : 
     223             : } // end anonymous namespace
     224             : 
     225             : char FalkorHWPFFix::ID = 0;
     226             : 
     227       76339 : INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late",
     228             :                       "Falkor HW Prefetch Fix Late Phase", false, false)
     229       76339 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     230      359158 : INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late",
     231             :                     "Falkor HW Prefetch Fix Late Phase", false, false)
     232             : 
     233             : static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
     234          76 :   return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
     235             : }
     236             : 
     237         132 : static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     238             :   int DestRegIdx;
     239             :   int BaseRegIdx;
     240             :   int OffsetIdx;
     241             :   bool IsPrePost;
     242             : 
     243         264 :   switch (MI.getOpcode()) {
     244             :   default:
     245             :     return None;
     246             : 
     247             :   case AArch64::LD1i64:
     248             :   case AArch64::LD2i64:
     249             :     DestRegIdx = 0;
     250             :     BaseRegIdx = 3;
     251             :     OffsetIdx = -1;
     252             :     IsPrePost = false;
     253             :     break;
     254             : 
     255           2 :   case AArch64::LD1i8:
     256             :   case AArch64::LD1i16:
     257             :   case AArch64::LD1i32:
     258             :   case AArch64::LD2i8:
     259             :   case AArch64::LD2i16:
     260             :   case AArch64::LD2i32:
     261             :   case AArch64::LD3i8:
     262             :   case AArch64::LD3i16:
     263             :   case AArch64::LD3i32:
     264             :   case AArch64::LD3i64:
     265             :   case AArch64::LD4i8:
     266             :   case AArch64::LD4i16:
     267             :   case AArch64::LD4i32:
     268             :   case AArch64::LD4i64:
     269             :     DestRegIdx = -1;
     270             :     BaseRegIdx = 3;
     271             :     OffsetIdx = -1;
     272             :     IsPrePost = false;
     273           2 :     break;
     274             : 
     275           2 :   case AArch64::LD1Onev1d:
     276             :   case AArch64::LD1Onev2s:
     277             :   case AArch64::LD1Onev4h:
     278             :   case AArch64::LD1Onev8b:
     279             :   case AArch64::LD1Onev2d:
     280             :   case AArch64::LD1Onev4s:
     281             :   case AArch64::LD1Onev8h:
     282             :   case AArch64::LD1Onev16b:
     283             :   case AArch64::LD1Rv1d:
     284             :   case AArch64::LD1Rv2s:
     285             :   case AArch64::LD1Rv4h:
     286             :   case AArch64::LD1Rv8b:
     287             :   case AArch64::LD1Rv2d:
     288             :   case AArch64::LD1Rv4s:
     289             :   case AArch64::LD1Rv8h:
     290             :   case AArch64::LD1Rv16b:
     291             :     DestRegIdx = 0;
     292             :     BaseRegIdx = 1;
     293             :     OffsetIdx = -1;
     294             :     IsPrePost = false;
     295           2 :     break;
     296             : 
     297           2 :   case AArch64::LD1Twov1d:
     298             :   case AArch64::LD1Twov2s:
     299             :   case AArch64::LD1Twov4h:
     300             :   case AArch64::LD1Twov8b:
     301             :   case AArch64::LD1Twov2d:
     302             :   case AArch64::LD1Twov4s:
     303             :   case AArch64::LD1Twov8h:
     304             :   case AArch64::LD1Twov16b:
     305             :   case AArch64::LD1Threev1d:
     306             :   case AArch64::LD1Threev2s:
     307             :   case AArch64::LD1Threev4h:
     308             :   case AArch64::LD1Threev8b:
     309             :   case AArch64::LD1Threev2d:
     310             :   case AArch64::LD1Threev4s:
     311             :   case AArch64::LD1Threev8h:
     312             :   case AArch64::LD1Threev16b:
     313             :   case AArch64::LD1Fourv1d:
     314             :   case AArch64::LD1Fourv2s:
     315             :   case AArch64::LD1Fourv4h:
     316             :   case AArch64::LD1Fourv8b:
     317             :   case AArch64::LD1Fourv2d:
     318             :   case AArch64::LD1Fourv4s:
     319             :   case AArch64::LD1Fourv8h:
     320             :   case AArch64::LD1Fourv16b:
     321             :   case AArch64::LD2Twov2s:
     322             :   case AArch64::LD2Twov4s:
     323             :   case AArch64::LD2Twov8b:
     324             :   case AArch64::LD2Twov2d:
     325             :   case AArch64::LD2Twov4h:
     326             :   case AArch64::LD2Twov8h:
     327             :   case AArch64::LD2Twov16b:
     328             :   case AArch64::LD2Rv1d:
     329             :   case AArch64::LD2Rv2s:
     330             :   case AArch64::LD2Rv4s:
     331             :   case AArch64::LD2Rv8b:
     332             :   case AArch64::LD2Rv2d:
     333             :   case AArch64::LD2Rv4h:
     334             :   case AArch64::LD2Rv8h:
     335             :   case AArch64::LD2Rv16b:
     336             :   case AArch64::LD3Threev2s:
     337             :   case AArch64::LD3Threev4h:
     338             :   case AArch64::LD3Threev8b:
     339             :   case AArch64::LD3Threev2d:
     340             :   case AArch64::LD3Threev4s:
     341             :   case AArch64::LD3Threev8h:
     342             :   case AArch64::LD3Threev16b:
     343             :   case AArch64::LD3Rv1d:
     344             :   case AArch64::LD3Rv2s:
     345             :   case AArch64::LD3Rv4h:
     346             :   case AArch64::LD3Rv8b:
     347             :   case AArch64::LD3Rv2d:
     348             :   case AArch64::LD3Rv4s:
     349             :   case AArch64::LD3Rv8h:
     350             :   case AArch64::LD3Rv16b:
     351             :   case AArch64::LD4Fourv2s:
     352             :   case AArch64::LD4Fourv4h:
     353             :   case AArch64::LD4Fourv8b:
     354             :   case AArch64::LD4Fourv2d:
     355             :   case AArch64::LD4Fourv4s:
     356             :   case AArch64::LD4Fourv8h:
     357             :   case AArch64::LD4Fourv16b:
     358             :   case AArch64::LD4Rv1d:
     359             :   case AArch64::LD4Rv2s:
     360             :   case AArch64::LD4Rv4h:
     361             :   case AArch64::LD4Rv8b:
     362             :   case AArch64::LD4Rv2d:
     363             :   case AArch64::LD4Rv4s:
     364             :   case AArch64::LD4Rv8h:
     365             :   case AArch64::LD4Rv16b:
     366             :     DestRegIdx = -1;
     367             :     BaseRegIdx = 1;
     368             :     OffsetIdx = -1;
     369             :     IsPrePost = false;
     370           2 :     break;
     371             : 
     372           2 :   case AArch64::LD1i64_POST:
     373             :   case AArch64::LD2i64_POST:
     374             :     DestRegIdx = 1;
     375             :     BaseRegIdx = 4;
     376             :     OffsetIdx = 5;
     377             :     IsPrePost = true;
     378           2 :     break;
     379             : 
     380           2 :   case AArch64::LD1i8_POST:
     381             :   case AArch64::LD1i16_POST:
     382             :   case AArch64::LD1i32_POST:
     383             :   case AArch64::LD2i8_POST:
     384             :   case AArch64::LD2i16_POST:
     385             :   case AArch64::LD2i32_POST:
     386             :   case AArch64::LD3i8_POST:
     387             :   case AArch64::LD3i16_POST:
     388             :   case AArch64::LD3i32_POST:
     389             :   case AArch64::LD3i64_POST:
     390             :   case AArch64::LD4i8_POST:
     391             :   case AArch64::LD4i16_POST:
     392             :   case AArch64::LD4i32_POST:
     393             :   case AArch64::LD4i64_POST:
     394             :     DestRegIdx = -1;
     395             :     BaseRegIdx = 4;
     396             :     OffsetIdx = 5;
     397             :     IsPrePost = true;
     398           2 :     break;
     399             : 
     400           2 :   case AArch64::LD1Onev1d_POST:
     401             :   case AArch64::LD1Onev2s_POST:
     402             :   case AArch64::LD1Onev4h_POST:
     403             :   case AArch64::LD1Onev8b_POST:
     404             :   case AArch64::LD1Onev2d_POST:
     405             :   case AArch64::LD1Onev4s_POST:
     406             :   case AArch64::LD1Onev8h_POST:
     407             :   case AArch64::LD1Onev16b_POST:
     408             :   case AArch64::LD1Rv1d_POST:
     409             :   case AArch64::LD1Rv2s_POST:
     410             :   case AArch64::LD1Rv4h_POST:
     411             :   case AArch64::LD1Rv8b_POST:
     412             :   case AArch64::LD1Rv2d_POST:
     413             :   case AArch64::LD1Rv4s_POST:
     414             :   case AArch64::LD1Rv8h_POST:
     415             :   case AArch64::LD1Rv16b_POST:
     416             :     DestRegIdx = 1;
     417             :     BaseRegIdx = 2;
     418             :     OffsetIdx = 3;
     419             :     IsPrePost = true;
     420           2 :     break;
     421             : 
     422           2 :   case AArch64::LD1Twov1d_POST:
     423             :   case AArch64::LD1Twov2s_POST:
     424             :   case AArch64::LD1Twov4h_POST:
     425             :   case AArch64::LD1Twov8b_POST:
     426             :   case AArch64::LD1Twov2d_POST:
     427             :   case AArch64::LD1Twov4s_POST:
     428             :   case AArch64::LD1Twov8h_POST:
     429             :   case AArch64::LD1Twov16b_POST:
     430             :   case AArch64::LD1Threev1d_POST:
     431             :   case AArch64::LD1Threev2s_POST:
     432             :   case AArch64::LD1Threev4h_POST:
     433             :   case AArch64::LD1Threev8b_POST:
     434             :   case AArch64::LD1Threev2d_POST:
     435             :   case AArch64::LD1Threev4s_POST:
     436             :   case AArch64::LD1Threev8h_POST:
     437             :   case AArch64::LD1Threev16b_POST:
     438             :   case AArch64::LD1Fourv1d_POST:
     439             :   case AArch64::LD1Fourv2s_POST:
     440             :   case AArch64::LD1Fourv4h_POST:
     441             :   case AArch64::LD1Fourv8b_POST:
     442             :   case AArch64::LD1Fourv2d_POST:
     443             :   case AArch64::LD1Fourv4s_POST:
     444             :   case AArch64::LD1Fourv8h_POST:
     445             :   case AArch64::LD1Fourv16b_POST:
     446             :   case AArch64::LD2Twov2s_POST:
     447             :   case AArch64::LD2Twov4s_POST:
     448             :   case AArch64::LD2Twov8b_POST:
     449             :   case AArch64::LD2Twov2d_POST:
     450             :   case AArch64::LD2Twov4h_POST:
     451             :   case AArch64::LD2Twov8h_POST:
     452             :   case AArch64::LD2Twov16b_POST:
     453             :   case AArch64::LD2Rv1d_POST:
     454             :   case AArch64::LD2Rv2s_POST:
     455             :   case AArch64::LD2Rv4s_POST:
     456             :   case AArch64::LD2Rv8b_POST:
     457             :   case AArch64::LD2Rv2d_POST:
     458             :   case AArch64::LD2Rv4h_POST:
     459             :   case AArch64::LD2Rv8h_POST:
     460             :   case AArch64::LD2Rv16b_POST:
     461             :   case AArch64::LD3Threev2s_POST:
     462             :   case AArch64::LD3Threev4h_POST:
     463             :   case AArch64::LD3Threev8b_POST:
     464             :   case AArch64::LD3Threev2d_POST:
     465             :   case AArch64::LD3Threev4s_POST:
     466             :   case AArch64::LD3Threev8h_POST:
     467             :   case AArch64::LD3Threev16b_POST:
     468             :   case AArch64::LD3Rv1d_POST:
     469             :   case AArch64::LD3Rv2s_POST:
     470             :   case AArch64::LD3Rv4h_POST:
     471             :   case AArch64::LD3Rv8b_POST:
     472             :   case AArch64::LD3Rv2d_POST:
     473             :   case AArch64::LD3Rv4s_POST:
     474             :   case AArch64::LD3Rv8h_POST:
     475             :   case AArch64::LD3Rv16b_POST:
     476             :   case AArch64::LD4Fourv2s_POST:
     477             :   case AArch64::LD4Fourv4h_POST:
     478             :   case AArch64::LD4Fourv8b_POST:
     479             :   case AArch64::LD4Fourv2d_POST:
     480             :   case AArch64::LD4Fourv4s_POST:
     481             :   case AArch64::LD4Fourv8h_POST:
     482             :   case AArch64::LD4Fourv16b_POST:
     483             :   case AArch64::LD4Rv1d_POST:
     484             :   case AArch64::LD4Rv2s_POST:
     485             :   case AArch64::LD4Rv4h_POST:
     486             :   case AArch64::LD4Rv8b_POST:
     487             :   case AArch64::LD4Rv2d_POST:
     488             :   case AArch64::LD4Rv4s_POST:
     489             :   case AArch64::LD4Rv8h_POST:
     490             :   case AArch64::LD4Rv16b_POST:
     491             :     DestRegIdx = -1;
     492             :     BaseRegIdx = 2;
     493             :     OffsetIdx = 3;
     494             :     IsPrePost = true;
     495           2 :     break;
     496             : 
     497          25 :   case AArch64::LDRBBroW:
     498             :   case AArch64::LDRBBroX:
     499             :   case AArch64::LDRBBui:
     500             :   case AArch64::LDRBroW:
     501             :   case AArch64::LDRBroX:
     502             :   case AArch64::LDRBui:
     503             :   case AArch64::LDRDl:
     504             :   case AArch64::LDRDroW:
     505             :   case AArch64::LDRDroX:
     506             :   case AArch64::LDRDui:
     507             :   case AArch64::LDRHHroW:
     508             :   case AArch64::LDRHHroX:
     509             :   case AArch64::LDRHHui:
     510             :   case AArch64::LDRHroW:
     511             :   case AArch64::LDRHroX:
     512             :   case AArch64::LDRHui:
     513             :   case AArch64::LDRQl:
     514             :   case AArch64::LDRQroW:
     515             :   case AArch64::LDRQroX:
     516             :   case AArch64::LDRQui:
     517             :   case AArch64::LDRSBWroW:
     518             :   case AArch64::LDRSBWroX:
     519             :   case AArch64::LDRSBWui:
     520             :   case AArch64::LDRSBXroW:
     521             :   case AArch64::LDRSBXroX:
     522             :   case AArch64::LDRSBXui:
     523             :   case AArch64::LDRSHWroW:
     524             :   case AArch64::LDRSHWroX:
     525             :   case AArch64::LDRSHWui:
     526             :   case AArch64::LDRSHXroW:
     527             :   case AArch64::LDRSHXroX:
     528             :   case AArch64::LDRSHXui:
     529             :   case AArch64::LDRSWl:
     530             :   case AArch64::LDRSWroW:
     531             :   case AArch64::LDRSWroX:
     532             :   case AArch64::LDRSWui:
     533             :   case AArch64::LDRSl:
     534             :   case AArch64::LDRSroW:
     535             :   case AArch64::LDRSroX:
     536             :   case AArch64::LDRSui:
     537             :   case AArch64::LDRWl:
     538             :   case AArch64::LDRWroW:
     539             :   case AArch64::LDRWroX:
     540             :   case AArch64::LDRWui:
     541             :   case AArch64::LDRXl:
     542             :   case AArch64::LDRXroW:
     543             :   case AArch64::LDRXroX:
     544             :   case AArch64::LDRXui:
     545             :   case AArch64::LDURBBi:
     546             :   case AArch64::LDURBi:
     547             :   case AArch64::LDURDi:
     548             :   case AArch64::LDURHHi:
     549             :   case AArch64::LDURHi:
     550             :   case AArch64::LDURQi:
     551             :   case AArch64::LDURSBWi:
     552             :   case AArch64::LDURSBXi:
     553             :   case AArch64::LDURSHWi:
     554             :   case AArch64::LDURSHXi:
     555             :   case AArch64::LDURSWi:
     556             :   case AArch64::LDURSi:
     557             :   case AArch64::LDURWi:
     558             :   case AArch64::LDURXi:
     559             :     DestRegIdx = 0;
     560             :     BaseRegIdx = 1;
     561             :     OffsetIdx = 2;
     562             :     IsPrePost = false;
     563          25 :     break;
     564             : 
     565           2 :   case AArch64::LDRBBpost:
     566             :   case AArch64::LDRBBpre:
     567             :   case AArch64::LDRBpost:
     568             :   case AArch64::LDRBpre:
     569             :   case AArch64::LDRDpost:
     570             :   case AArch64::LDRDpre:
     571             :   case AArch64::LDRHHpost:
     572             :   case AArch64::LDRHHpre:
     573             :   case AArch64::LDRHpost:
     574             :   case AArch64::LDRHpre:
     575             :   case AArch64::LDRQpost:
     576             :   case AArch64::LDRQpre:
     577             :   case AArch64::LDRSBWpost:
     578             :   case AArch64::LDRSBWpre:
     579             :   case AArch64::LDRSBXpost:
     580             :   case AArch64::LDRSBXpre:
     581             :   case AArch64::LDRSHWpost:
     582             :   case AArch64::LDRSHWpre:
     583             :   case AArch64::LDRSHXpost:
     584             :   case AArch64::LDRSHXpre:
     585             :   case AArch64::LDRSWpost:
     586             :   case AArch64::LDRSWpre:
     587             :   case AArch64::LDRSpost:
     588             :   case AArch64::LDRSpre:
     589             :   case AArch64::LDRWpost:
     590             :   case AArch64::LDRWpre:
     591             :   case AArch64::LDRXpost:
     592             :   case AArch64::LDRXpre:
     593             :     DestRegIdx = 1;
     594             :     BaseRegIdx = 2;
     595             :     OffsetIdx = 3;
     596             :     IsPrePost = true;
     597           2 :     break;
     598             : 
     599           2 :   case AArch64::LDNPDi:
     600             :   case AArch64::LDNPQi:
     601             :   case AArch64::LDNPSi:
     602             :   case AArch64::LDPQi:
     603             :   case AArch64::LDPDi:
     604             :   case AArch64::LDPSi:
     605             :     DestRegIdx = -1;
     606             :     BaseRegIdx = 2;
     607             :     OffsetIdx = 3;
     608             :     IsPrePost = false;
     609           2 :     break;
     610             : 
     611          10 :   case AArch64::LDPSWi:
     612             :   case AArch64::LDPWi:
     613             :   case AArch64::LDPXi:
     614             :     DestRegIdx = 0;
     615             :     BaseRegIdx = 2;
     616             :     OffsetIdx = 3;
     617             :     IsPrePost = false;
     618          10 :     break;
     619             : 
     620           2 :   case AArch64::LDPQpost:
     621             :   case AArch64::LDPQpre:
     622             :   case AArch64::LDPDpost:
     623             :   case AArch64::LDPDpre:
     624             :   case AArch64::LDPSpost:
     625             :   case AArch64::LDPSpre:
     626             :     DestRegIdx = -1;
     627             :     BaseRegIdx = 3;
     628             :     OffsetIdx = 4;
     629             :     IsPrePost = true;
     630           2 :     break;
     631             : 
     632           2 :   case AArch64::LDPSWpost:
     633             :   case AArch64::LDPSWpre:
     634             :   case AArch64::LDPWpost:
     635             :   case AArch64::LDPWpre:
     636             :   case AArch64::LDPXpost:
     637             :   case AArch64::LDPXpre:
     638             :     DestRegIdx = 1;
     639             :     BaseRegIdx = 3;
     640             :     OffsetIdx = 4;
     641             :     IsPrePost = true;
     642           2 :     break;
     643             :   }
     644             : 
     645             :   // Loads from the stack pointer don't get prefetched.
     646         118 :   unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg();
     647          59 :   if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
     648             :     return None;
     649             : 
     650             :   LoadInfo LI;
     651         104 :   LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
     652             :   LI.BaseReg = BaseReg;
     653             :   LI.BaseRegIdx = BaseRegIdx;
     654          58 :   LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
     655          58 :   LI.IsPrePost = IsPrePost;
     656             :   return LI;
     657             : }
     658             : 
     659          76 : static Optional<unsigned> getTag(const TargetRegisterInfo *TRI,
     660             :                                  const MachineInstr &MI, const LoadInfo &LI) {
     661         134 :   unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
     662         152 :   unsigned Base = TRI->getEncodingValue(LI.BaseReg);
     663             :   unsigned Off;
     664          76 :   if (LI.OffsetOpnd == nullptr)
     665             :     Off = 0;
     666          64 :   else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
     667             :            LI.OffsetOpnd->isCPI())
     668             :     return None;
     669          64 :   else if (LI.OffsetOpnd->isReg())
     670          34 :     Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
     671             :   else
     672          47 :     Off = LI.OffsetOpnd->getImm() >> 2;
     673             : 
     674             :   return makeTag(Dest, Base, Off);
     675             : }
     676             : 
     677          19 : void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
     678             :   // Build the initial tag map for the whole loop.
     679          19 :   TagMap.clear();
     680          57 :   for (MachineBasicBlock *MBB : L.getBlocks())
     681         148 :     for (MachineInstr &MI : *MBB) {
     682         110 :       Optional<LoadInfo> LInfo = getLoadInfo(MI);
     683         110 :       if (!LInfo)
     684          73 :         continue;
     685          37 :       Optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
     686          37 :       if (!Tag)
     687             :         continue;
     688          37 :       TagMap[*Tag].push_back(&MI);
     689             :     }
     690             : 
     691             :   bool AnyCollisions = false;
     692          39 :   for (auto &P : TagMap) {
     693             :     auto Size = P.second.size();
     694          18 :     if (Size > 1) {
     695          17 :       for (auto *MI : P.second) {
     696          17 :         if (TII->isStridedAccess(*MI)) {
     697             :           AnyCollisions = true;
     698             :           break;
     699             :         }
     700             :       }
     701             :     }
     702          18 :     if (AnyCollisions)
     703             :       break;
     704             :   }
     705             :   // Nothing to fix.
     706          19 :   if (!AnyCollisions)
     707           2 :     return;
     708             : 
     709          17 :   MachineRegisterInfo &MRI = Fn.getRegInfo();
     710             : 
     711             :   // Go through all the basic blocks in the current loop and fix any streaming
     712             :   // loads to avoid collisions with any other loads.
     713          17 :   LiveRegUnits LR(*TRI);
     714          51 :   for (MachineBasicBlock *MBB : L.getBlocks()) {
     715             :     LR.clear();
     716          17 :     LR.addLiveOuts(*MBB);
     717         266 :     for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
     718             :       MachineInstr &MI = *I;
     719         116 :       if (!TII->isStridedAccess(MI))
     720         192 :         continue;
     721             : 
     722          22 :       Optional<LoadInfo> OptLdI = getLoadInfo(MI);
     723          22 :       if (!OptLdI)
     724           1 :         continue;
     725          21 :       LoadInfo LdI = *OptLdI;
     726          21 :       Optional<unsigned> OptOldTag = getTag(TRI, MI, LdI);
     727          21 :       if (!OptOldTag)
     728           0 :         continue;
     729             :       auto &OldCollisions = TagMap[*OptOldTag];
     730          21 :       if (OldCollisions.size() <= 1)
     731           3 :         continue;
     732             : 
     733             :       bool Fixed = false;
     734             :       LLVM_DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
     735             : 
     736             :       if (!DebugCounter::shouldExecute(FixCounter)) {
     737             :         LLVM_DEBUG(dbgs() << "Skipping fix due to debug counter:\n  " << MI);
     738             :         continue;
     739             :       }
     740             : 
     741             :       // Add the non-base registers of MI as live so we don't use them as
     742             :       // scratch registers.
     743          91 :       for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
     744          73 :         if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
     745          18 :           continue;
     746          55 :         MachineOperand &MO = MI.getOperand(OpI);
     747          55 :         if (MO.isReg() && MO.readsReg())
     748           9 :           LR.addReg(MO.getReg());
     749             :       }
     750             : 
     751          86 :       for (unsigned ScratchReg : AArch64::GPR64RegClass) {
     752          86 :         if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
     753          50 :           continue;
     754             : 
     755             :         LoadInfo NewLdI(LdI);
     756             :         NewLdI.BaseReg = ScratchReg;
     757          18 :         unsigned NewTag = *getTag(TRI, MI, NewLdI);
     758             :         // Scratch reg tag would collide too, so don't use it.
     759           0 :         if (TagMap.count(NewTag))
     760           0 :           continue;
     761             : 
     762             :         LLVM_DEBUG(dbgs() << "Changing base reg to: "
     763             :                           << printReg(ScratchReg, TRI) << '\n');
     764             : 
     765             :         // Rewrite:
     766             :         //   Xd = LOAD Xb, off
     767             :         // to:
     768             :         //   Xc = MOV Xb
     769             :         //   Xd = LOAD Xc, off
     770             :         DebugLoc DL = MI.getDebugLoc();
     771          54 :         BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
     772          18 :             .addReg(AArch64::XZR)
     773          18 :             .addReg(LdI.BaseReg)
     774             :             .addImm(0);
     775          18 :         MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
     776          18 :         BaseOpnd.setReg(ScratchReg);
     777             : 
     778             :         // If the load does a pre/post increment, then insert a MOV after as
     779             :         // well to update the real base register.
     780          18 :         if (LdI.IsPrePost) {
     781             :           LLVM_DEBUG(dbgs() << "Doing post MOV of incremented reg: "
     782             :                             << printReg(ScratchReg, TRI) << '\n');
     783           7 :           MI.getOperand(0).setReg(
     784             :               ScratchReg); // Change tied operand pre/post update dest.
     785          14 :           BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
     786           7 :                   TII->get(AArch64::ORRXrs), LdI.BaseReg)
     787           7 :               .addReg(AArch64::XZR)
     788           7 :               .addReg(ScratchReg)
     789             :               .addImm(0);
     790             :         }
     791             : 
     792          21 :         for (int I = 0, E = OldCollisions.size(); I != E; ++I)
     793          42 :           if (OldCollisions[I] == &MI) {
     794          18 :             std::swap(OldCollisions[I], OldCollisions[E - 1]);
     795             :             OldCollisions.pop_back();
     796             :             break;
     797             :           }
     798             : 
     799             :         // Update TagMap to reflect instruction changes to reduce the number
     800             :         // of later MOVs to be inserted.  This needs to be done after
     801             :         // OldCollisions is updated since it may be relocated by this
     802             :         // insertion.
     803          18 :         TagMap[NewTag].push_back(&MI);
     804             :         ++NumCollisionsAvoided;
     805             :         Fixed = true;
     806          18 :         Modified = true;
     807             :         break;
     808             :       }
     809             :       if (!Fixed)
     810             :         ++NumCollisionsNotAvoided;
     811             :     }
     812             :   }
     813             : }
     814             : 
     815       13306 : bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
     816       13306 :   auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
     817       13306 :   if (ST.getProcFamily() != AArch64Subtarget::Falkor)
     818             :     return false;
     819             : 
     820          66 :   if (skipFunction(Fn.getFunction()))
     821             :     return false;
     822             : 
     823          66 :   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
     824          66 :   TRI = ST.getRegisterInfo();
     825             : 
     826             :   assert(TRI->trackLivenessAfterRegAlloc(Fn) &&
     827             :          "Register liveness not available!");
     828             : 
     829          66 :   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
     830             : 
     831          66 :   Modified = false;
     832             : 
     833          85 :   for (MachineLoop *I : LI)
     834          57 :     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
     835             :       // Only process inner-loops
     836          19 :       if (L->empty())
     837          19 :         runOnLoop(**L, Fn);
     838             : 
     839          66 :   return Modified;
     840             : }
     841             : 
     842      203368 : FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }

Generated by: LCOV version 1.13