LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FalkorHWPFFix.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 179 241 74.3 %
Date: 2017-09-14 15:23:50 Functions: 20 21 95.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
      10             : /// that may inhibit the HW prefetching.  This is done in two steps.  Before
      11             : /// ISel, we mark strided loads (i.e. those that will likely benefit from
      12             : /// prefetching) with metadata.  Then, after opcodes have been finalized, we
      13             : /// insert MOVs and re-write loads to prevent unintnentional tag collisions.
      14             : // ===---------------------------------------------------------------------===//
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64Subtarget.h"
      19             : #include "AArch64TargetMachine.h"
      20             : #include "llvm/ADT/DenseMap.h"
      21             : #include "llvm/ADT/DepthFirstIterator.h"
      22             : #include "llvm/ADT/None.h"
      23             : #include "llvm/ADT/Optional.h"
      24             : #include "llvm/ADT/SmallVector.h"
      25             : #include "llvm/ADT/Statistic.h"
      26             : #include "llvm/Analysis/LoopInfo.h"
      27             : #include "llvm/Analysis/ScalarEvolution.h"
      28             : #include "llvm/Analysis/ScalarEvolutionExpressions.h"
      29             : #include "llvm/CodeGen/LiveRegUnits.h"
      30             : #include "llvm/CodeGen/MachineBasicBlock.h"
      31             : #include "llvm/CodeGen/MachineFunction.h"
      32             : #include "llvm/CodeGen/MachineFunctionPass.h"
      33             : #include "llvm/CodeGen/MachineInstr.h"
      34             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      35             : #include "llvm/CodeGen/MachineLoopInfo.h"
      36             : #include "llvm/CodeGen/MachineOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetPassConfig.h"
      39             : #include "llvm/IR/DebugLoc.h"
      40             : #include "llvm/IR/Dominators.h"
      41             : #include "llvm/IR/Function.h"
      42             : #include "llvm/IR/Instruction.h"
      43             : #include "llvm/IR/Instructions.h"
      44             : #include "llvm/IR/Metadata.h"
      45             : #include "llvm/Pass.h"
      46             : #include "llvm/Support/Casting.h"
      47             : #include "llvm/Support/Debug.h"
      48             : #include "llvm/Support/raw_ostream.h"
      49             : #include "llvm/Target/TargetRegisterInfo.h"
      50             : #include <cassert>
      51             : #include <iterator>
      52             : #include <utility>
      53             : 
      54             : using namespace llvm;
      55             : 
      56             : #define DEBUG_TYPE "falkor-hwpf-fix"
      57             : 
      58             : STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
      59             : STATISTIC(NumCollisionsAvoided,
      60             :           "Number of HW prefetch tag collisions avoided");
      61             : STATISTIC(NumCollisionsNotAvoided,
      62             :           "Number of HW prefetch tag collisions not avoided due to lack of regsiters");
      63             : 
      64             : namespace {
      65             : 
      66             : class FalkorMarkStridedAccesses {
      67             : public:
      68             :   FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
      69          51 :       : LI(LI), SE(SE) {}
      70             : 
      71             :   bool run();
      72             : 
      73             : private:
      74             :   bool runOnLoop(Loop &L);
      75             : 
      76             :   LoopInfo &LI;
      77             :   ScalarEvolution &SE;
      78             : };
      79             : 
      80        1818 : class FalkorMarkStridedAccessesLegacy : public FunctionPass {
      81             : public:
      82             :   static char ID; // Pass ID, replacement for typeid
      83             : 
      84        1832 :   FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
      85         916 :     initializeFalkorMarkStridedAccessesLegacyPass(
      86         916 :         *PassRegistry::getPassRegistry());
      87             :   }
      88             : 
      89         913 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      90         913 :     AU.addRequired<TargetPassConfig>();
      91         913 :     AU.addPreserved<DominatorTreeWrapperPass>();
      92         913 :     AU.addRequired<LoopInfoWrapperPass>();
      93         913 :     AU.addPreserved<LoopInfoWrapperPass>();
      94         913 :     AU.addRequired<ScalarEvolutionWrapperPass>();
      95         913 :     AU.addPreserved<ScalarEvolutionWrapperPass>();
      96         913 :   }
      97             : 
      98             :   bool runOnFunction(Function &F) override;
      99             : };
     100             : 
     101             : } // end anonymous namespace
     102             : 
     103             : char FalkorMarkStridedAccessesLegacy::ID = 0;
     104             : 
     105       53045 : INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
     106             :                       "Falkor HW Prefetch Fix", false, false)
     107       53045 : INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
     108       53045 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
     109       53045 : INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
     110      315301 : INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
     111             :                     "Falkor HW Prefetch Fix", false, false)
     112             : 
     113         914 : FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
     114        1828 :   return new FalkorMarkStridedAccessesLegacy();
     115             : }
     116             : 
     117       11023 : bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
     118       11023 :   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
     119             :   const AArch64Subtarget *ST =
     120       11023 :       TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
     121       11023 :   if (ST->getProcFamily() != AArch64Subtarget::Falkor)
     122             :     return false;
     123             : 
     124          51 :   if (skipFunction(F))
     125             :     return false;
     126             : 
     127         102 :   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     128         102 :   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     129             : 
     130          51 :   FalkorMarkStridedAccesses LDP(LI, SE);
     131          51 :   return LDP.run();
     132             : }
     133             : 
     134          51 : bool FalkorMarkStridedAccesses::run() {
     135          51 :   bool MadeChange = false;
     136             : 
     137         209 :   for (Loop *L : LI)
     138          26 :     for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
     139           6 :       MadeChange |= runOnLoop(**LIt);
     140             : 
     141          51 :   return MadeChange;
     142             : }
     143             : 
     144           6 : bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
     145             :   // Only mark strided loads in the inner-most loop
     146          12 :   if (!L.empty())
     147             :     return false;
     148             : 
     149           5 :   bool MadeChange = false;
     150             : 
     151          20 :   for (BasicBlock *BB : L.blocks()) {
     152          80 :     for (Instruction &I : *BB) {
     153          13 :       LoadInst *LoadI = dyn_cast<LoadInst>(&I);
     154          52 :       if (!LoadI)
     155             :         continue;
     156             : 
     157          13 :       Value *PtrValue = LoadI->getPointerOperand();
     158          13 :       if (L.isLoopInvariant(PtrValue))
     159             :         continue;
     160             : 
     161          13 :       const SCEV *LSCEV = SE.getSCEV(PtrValue);
     162          12 :       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
     163          13 :       if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
     164             :         continue;
     165             : 
     166          24 :       LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
     167          36 :                          MDNode::get(LoadI->getContext(), {}));
     168          12 :       ++NumStridedLoadsMarked;
     169             :       DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
     170          12 :       MadeChange = true;
     171             :     }
     172             :   }
     173             : 
     174             :   return MadeChange;
     175             : }
     176             : 
     177             : namespace {
     178             : 
     179        2724 : class FalkorHWPFFix : public MachineFunctionPass {
     180             : public:
     181             :   static char ID;
     182             : 
     183        1830 :   FalkorHWPFFix() : MachineFunctionPass(ID) {
     184         915 :     initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry());
     185         915 :   }
     186             : 
     187             :   bool runOnMachineFunction(MachineFunction &Fn) override;
     188             : 
     189         905 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     190         905 :     AU.addRequired<MachineLoopInfo>();
     191         905 :     MachineFunctionPass::getAnalysisUsage(AU);
     192         905 :   }
     193             : 
     194         905 :   MachineFunctionProperties getRequiredProperties() const override {
     195        2715 :     return MachineFunctionProperties().set(
     196        2715 :         MachineFunctionProperties::Property::NoVRegs);
     197             :   }
     198             : 
     199             : private:
     200             :   void runOnLoop(MachineLoop &L, MachineFunction &Fn);
     201             : 
     202             :   const AArch64InstrInfo *TII;
     203             :   const TargetRegisterInfo *TRI;
     204             :   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap;
     205             :   bool Modified;
     206             : };
     207             : 
     208             : /// Bits from load opcodes used to compute HW prefetcher instruction tags.
     209             : struct LoadInfo {
     210             :   LoadInfo() = default;
     211             : 
     212             :   unsigned DestReg = 0;
     213             :   unsigned BaseReg = 0;
     214             :   int BaseRegIdx = -1;
     215             :   const MachineOperand *OffsetOpnd = nullptr;
     216             :   bool IsPrePost = false;
     217             : };
     218             : 
     219             : } // end anonymous namespace
     220             : 
     221             : char FalkorHWPFFix::ID = 0;
     222             : 
     223       53045 : INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late",
     224             :                       "Falkor HW Prefetch Fix Late Phase", false, false)
     225       53045 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     226      315298 : INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late",
     227             :                     "Falkor HW Prefetch Fix Late Phase", false, false)
     228             : 
     229             : static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
     230          18 :   return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
     231             : }
     232             : 
     233          39 : static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     234             :   int DestRegIdx;
     235             :   int BaseRegIdx;
     236             :   int OffsetIdx;
     237             :   bool IsPrePost;
     238             : 
     239          78 :   switch (MI.getOpcode()) {
     240             :   default:
     241          25 :     return None;
     242             : 
     243             :   case AArch64::LD1i8:
     244             :   case AArch64::LD1i16:
     245             :   case AArch64::LD1i32:
     246             :   case AArch64::LD1i64:
     247             :   case AArch64::LD2i8:
     248             :   case AArch64::LD2i16:
     249             :   case AArch64::LD2i32:
     250             :   case AArch64::LD2i64:
     251             :   case AArch64::LD3i8:
     252             :   case AArch64::LD3i16:
     253             :   case AArch64::LD3i32:
     254             :   case AArch64::LD4i8:
     255             :   case AArch64::LD4i16:
     256             :   case AArch64::LD4i32:
     257             :     DestRegIdx = 0;
     258             :     BaseRegIdx = 3;
     259             :     OffsetIdx = -1;
     260             :     IsPrePost = false;
     261             :     break;
     262             : 
     263           0 :   case AArch64::LD3i64:
     264             :   case AArch64::LD4i64:
     265           0 :     DestRegIdx = -1;
     266           0 :     BaseRegIdx = 3;
     267           0 :     OffsetIdx = -1;
     268           0 :     IsPrePost = false;
     269           0 :     break;
     270             : 
     271           0 :   case AArch64::LD1Onev1d:
     272             :   case AArch64::LD1Onev2s:
     273             :   case AArch64::LD1Onev4h:
     274             :   case AArch64::LD1Onev8b:
     275             :   case AArch64::LD1Onev2d:
     276             :   case AArch64::LD1Onev4s:
     277             :   case AArch64::LD1Onev8h:
     278             :   case AArch64::LD1Onev16b:
     279             :   case AArch64::LD1Rv1d:
     280             :   case AArch64::LD1Rv2s:
     281             :   case AArch64::LD1Rv4h:
     282             :   case AArch64::LD1Rv8b:
     283             :   case AArch64::LD1Rv2d:
     284             :   case AArch64::LD1Rv4s:
     285             :   case AArch64::LD1Rv8h:
     286             :   case AArch64::LD1Rv16b:
     287             :   case AArch64::LD1Twov1d:
     288             :   case AArch64::LD1Twov2s:
     289             :   case AArch64::LD1Twov4h:
     290             :   case AArch64::LD1Twov8b:
     291             :   case AArch64::LD2Twov2s:
     292             :   case AArch64::LD2Twov4s:
     293             :   case AArch64::LD2Twov8b:
     294             :   case AArch64::LD2Rv1d:
     295             :   case AArch64::LD2Rv2s:
     296             :   case AArch64::LD2Rv4s:
     297             :   case AArch64::LD2Rv8b:
     298           0 :     DestRegIdx = 0;
     299           0 :     BaseRegIdx = 1;
     300           0 :     OffsetIdx = -1;
     301           0 :     IsPrePost = false;
     302           0 :     break;
     303             : 
     304           0 :   case AArch64::LD1Twov2d:
     305             :   case AArch64::LD1Twov4s:
     306             :   case AArch64::LD1Twov8h:
     307             :   case AArch64::LD1Twov16b:
     308             :   case AArch64::LD1Threev1d:
     309             :   case AArch64::LD1Threev2s:
     310             :   case AArch64::LD1Threev4h:
     311             :   case AArch64::LD1Threev8b:
     312             :   case AArch64::LD1Threev2d:
     313             :   case AArch64::LD1Threev4s:
     314             :   case AArch64::LD1Threev8h:
     315             :   case AArch64::LD1Threev16b:
     316             :   case AArch64::LD1Fourv1d:
     317             :   case AArch64::LD1Fourv2s:
     318             :   case AArch64::LD1Fourv4h:
     319             :   case AArch64::LD1Fourv8b:
     320             :   case AArch64::LD1Fourv2d:
     321             :   case AArch64::LD1Fourv4s:
     322             :   case AArch64::LD1Fourv8h:
     323             :   case AArch64::LD1Fourv16b:
     324             :   case AArch64::LD2Twov2d:
     325             :   case AArch64::LD2Twov4h:
     326             :   case AArch64::LD2Twov8h:
     327             :   case AArch64::LD2Twov16b:
     328             :   case AArch64::LD2Rv2d:
     329             :   case AArch64::LD2Rv4h:
     330             :   case AArch64::LD2Rv8h:
     331             :   case AArch64::LD2Rv16b:
     332             :   case AArch64::LD3Threev2s:
     333             :   case AArch64::LD3Threev4h:
     334             :   case AArch64::LD3Threev8b:
     335             :   case AArch64::LD3Threev2d:
     336             :   case AArch64::LD3Threev4s:
     337             :   case AArch64::LD3Threev8h:
     338             :   case AArch64::LD3Threev16b:
     339             :   case AArch64::LD3Rv1d:
     340             :   case AArch64::LD3Rv2s:
     341             :   case AArch64::LD3Rv4h:
     342             :   case AArch64::LD3Rv8b:
     343             :   case AArch64::LD3Rv2d:
     344             :   case AArch64::LD3Rv4s:
     345             :   case AArch64::LD3Rv8h:
     346             :   case AArch64::LD3Rv16b:
     347             :   case AArch64::LD4Fourv2s:
     348             :   case AArch64::LD4Fourv4h:
     349             :   case AArch64::LD4Fourv8b:
     350             :   case AArch64::LD4Fourv2d:
     351             :   case AArch64::LD4Fourv4s:
     352             :   case AArch64::LD4Fourv8h:
     353             :   case AArch64::LD4Fourv16b:
     354             :   case AArch64::LD4Rv1d:
     355             :   case AArch64::LD4Rv2s:
     356             :   case AArch64::LD4Rv4h:
     357             :   case AArch64::LD4Rv8b:
     358             :   case AArch64::LD4Rv2d:
     359             :   case AArch64::LD4Rv4s:
     360             :   case AArch64::LD4Rv8h:
     361             :   case AArch64::LD4Rv16b:
     362           0 :     DestRegIdx = -1;
     363           0 :     BaseRegIdx = 1;
     364           0 :     OffsetIdx = -1;
     365           0 :     IsPrePost = false;
     366           0 :     break;
     367             : 
     368           0 :   case AArch64::LD1i8_POST:
     369             :   case AArch64::LD1i16_POST:
     370             :   case AArch64::LD1i32_POST:
     371             :   case AArch64::LD1i64_POST:
     372             :   case AArch64::LD2i8_POST:
     373             :   case AArch64::LD2i16_POST:
     374             :   case AArch64::LD2i32_POST:
     375             :   case AArch64::LD2i64_POST:
     376             :   case AArch64::LD3i8_POST:
     377             :   case AArch64::LD3i16_POST:
     378             :   case AArch64::LD3i32_POST:
     379             :   case AArch64::LD4i8_POST:
     380             :   case AArch64::LD4i16_POST:
     381             :   case AArch64::LD4i32_POST:
     382           0 :     DestRegIdx = 1;
     383           0 :     BaseRegIdx = 4;
     384           0 :     OffsetIdx = 5;
     385           0 :     IsPrePost = false;
     386           0 :     break;
     387             : 
     388           0 :   case AArch64::LD3i64_POST:
     389             :   case AArch64::LD4i64_POST:
     390           0 :     DestRegIdx = -1;
     391           0 :     BaseRegIdx = 4;
     392           0 :     OffsetIdx = 5;
     393           0 :     IsPrePost = false;
     394           0 :     break;
     395             : 
     396           0 :   case AArch64::LD1Onev1d_POST:
     397             :   case AArch64::LD1Onev2s_POST:
     398             :   case AArch64::LD1Onev4h_POST:
     399             :   case AArch64::LD1Onev8b_POST:
     400             :   case AArch64::LD1Onev2d_POST:
     401             :   case AArch64::LD1Onev4s_POST:
     402             :   case AArch64::LD1Onev8h_POST:
     403             :   case AArch64::LD1Onev16b_POST:
     404             :   case AArch64::LD1Rv1d_POST:
     405             :   case AArch64::LD1Rv2s_POST:
     406             :   case AArch64::LD1Rv4h_POST:
     407             :   case AArch64::LD1Rv8b_POST:
     408             :   case AArch64::LD1Rv2d_POST:
     409             :   case AArch64::LD1Rv4s_POST:
     410             :   case AArch64::LD1Rv8h_POST:
     411             :   case AArch64::LD1Rv16b_POST:
     412             :   case AArch64::LD1Twov1d_POST:
     413             :   case AArch64::LD1Twov2s_POST:
     414             :   case AArch64::LD1Twov4h_POST:
     415             :   case AArch64::LD1Twov8b_POST:
     416             :   case AArch64::LD2Twov2s_POST:
     417             :   case AArch64::LD2Twov4s_POST:
     418             :   case AArch64::LD2Twov8b_POST:
     419             :   case AArch64::LD2Rv1d_POST:
     420             :   case AArch64::LD2Rv2s_POST:
     421             :   case AArch64::LD2Rv4s_POST:
     422             :   case AArch64::LD2Rv8b_POST:
     423           0 :     DestRegIdx = 1;
     424           0 :     BaseRegIdx = 2;
     425           0 :     OffsetIdx = 3;
     426           0 :     IsPrePost = false;
     427           0 :     break;
     428             : 
     429           0 :   case AArch64::LD1Twov2d_POST:
     430             :   case AArch64::LD1Twov4s_POST:
     431             :   case AArch64::LD1Twov8h_POST:
     432             :   case AArch64::LD1Twov16b_POST:
     433             :   case AArch64::LD1Threev1d_POST:
     434             :   case AArch64::LD1Threev2s_POST:
     435             :   case AArch64::LD1Threev4h_POST:
     436             :   case AArch64::LD1Threev8b_POST:
     437             :   case AArch64::LD1Threev2d_POST:
     438             :   case AArch64::LD1Threev4s_POST:
     439             :   case AArch64::LD1Threev8h_POST:
     440             :   case AArch64::LD1Threev16b_POST:
     441             :   case AArch64::LD1Fourv1d_POST:
     442             :   case AArch64::LD1Fourv2s_POST:
     443             :   case AArch64::LD1Fourv4h_POST:
     444             :   case AArch64::LD1Fourv8b_POST:
     445             :   case AArch64::LD1Fourv2d_POST:
     446             :   case AArch64::LD1Fourv4s_POST:
     447             :   case AArch64::LD1Fourv8h_POST:
     448             :   case AArch64::LD1Fourv16b_POST:
     449             :   case AArch64::LD2Twov2d_POST:
     450             :   case AArch64::LD2Twov4h_POST:
     451             :   case AArch64::LD2Twov8h_POST:
     452             :   case AArch64::LD2Twov16b_POST:
     453             :   case AArch64::LD2Rv2d_POST:
     454             :   case AArch64::LD2Rv4h_POST:
     455             :   case AArch64::LD2Rv8h_POST:
     456             :   case AArch64::LD2Rv16b_POST:
     457             :   case AArch64::LD3Threev2s_POST:
     458             :   case AArch64::LD3Threev4h_POST:
     459             :   case AArch64::LD3Threev8b_POST:
     460             :   case AArch64::LD3Threev2d_POST:
     461             :   case AArch64::LD3Threev4s_POST:
     462             :   case AArch64::LD3Threev8h_POST:
     463             :   case AArch64::LD3Threev16b_POST:
     464             :   case AArch64::LD3Rv1d_POST:
     465             :   case AArch64::LD3Rv2s_POST:
     466             :   case AArch64::LD3Rv4h_POST:
     467             :   case AArch64::LD3Rv8b_POST:
     468             :   case AArch64::LD3Rv2d_POST:
     469             :   case AArch64::LD3Rv4s_POST:
     470             :   case AArch64::LD3Rv8h_POST:
     471             :   case AArch64::LD3Rv16b_POST:
     472             :   case AArch64::LD4Fourv2s_POST:
     473             :   case AArch64::LD4Fourv4h_POST:
     474             :   case AArch64::LD4Fourv8b_POST:
     475             :   case AArch64::LD4Fourv2d_POST:
     476             :   case AArch64::LD4Fourv4s_POST:
     477             :   case AArch64::LD4Fourv8h_POST:
     478             :   case AArch64::LD4Fourv16b_POST:
     479             :   case AArch64::LD4Rv1d_POST:
     480             :   case AArch64::LD4Rv2s_POST:
     481             :   case AArch64::LD4Rv4h_POST:
     482             :   case AArch64::LD4Rv8b_POST:
     483             :   case AArch64::LD4Rv2d_POST:
     484             :   case AArch64::LD4Rv4s_POST:
     485             :   case AArch64::LD4Rv8h_POST:
     486             :   case AArch64::LD4Rv16b_POST:
     487           0 :     DestRegIdx = -1;
     488           0 :     BaseRegIdx = 2;
     489           0 :     OffsetIdx = 3;
     490           0 :     IsPrePost = false;
     491           0 :     break;
     492             : 
     493           4 :   case AArch64::LDRBBroW:
     494             :   case AArch64::LDRBBroX:
     495             :   case AArch64::LDRBBui:
     496             :   case AArch64::LDRBroW:
     497             :   case AArch64::LDRBroX:
     498             :   case AArch64::LDRBui:
     499             :   case AArch64::LDRDl:
     500             :   case AArch64::LDRDroW:
     501             :   case AArch64::LDRDroX:
     502             :   case AArch64::LDRDui:
     503             :   case AArch64::LDRHHroW:
     504             :   case AArch64::LDRHHroX:
     505             :   case AArch64::LDRHHui:
     506             :   case AArch64::LDRHroW:
     507             :   case AArch64::LDRHroX:
     508             :   case AArch64::LDRHui:
     509             :   case AArch64::LDRQl:
     510             :   case AArch64::LDRQroW:
     511             :   case AArch64::LDRQroX:
     512             :   case AArch64::LDRQui:
     513             :   case AArch64::LDRSBWroW:
     514             :   case AArch64::LDRSBWroX:
     515             :   case AArch64::LDRSBWui:
     516             :   case AArch64::LDRSBXroW:
     517             :   case AArch64::LDRSBXroX:
     518             :   case AArch64::LDRSBXui:
     519             :   case AArch64::LDRSHWroW:
     520             :   case AArch64::LDRSHWroX:
     521             :   case AArch64::LDRSHWui:
     522             :   case AArch64::LDRSHXroW:
     523             :   case AArch64::LDRSHXroX:
     524             :   case AArch64::LDRSHXui:
     525             :   case AArch64::LDRSWl:
     526             :   case AArch64::LDRSWroW:
     527             :   case AArch64::LDRSWroX:
     528             :   case AArch64::LDRSWui:
     529             :   case AArch64::LDRSl:
     530             :   case AArch64::LDRSroW:
     531             :   case AArch64::LDRSroX:
     532             :   case AArch64::LDRSui:
     533             :   case AArch64::LDRWl:
     534             :   case AArch64::LDRWroW:
     535             :   case AArch64::LDRWroX:
     536             :   case AArch64::LDRWui:
     537             :   case AArch64::LDRXl:
     538             :   case AArch64::LDRXroW:
     539             :   case AArch64::LDRXroX:
     540             :   case AArch64::LDRXui:
     541             :   case AArch64::LDURBBi:
     542             :   case AArch64::LDURBi:
     543             :   case AArch64::LDURDi:
     544             :   case AArch64::LDURHHi:
     545             :   case AArch64::LDURHi:
     546             :   case AArch64::LDURQi:
     547             :   case AArch64::LDURSBWi:
     548             :   case AArch64::LDURSBXi:
     549             :   case AArch64::LDURSHWi:
     550             :   case AArch64::LDURSHXi:
     551             :   case AArch64::LDURSWi:
     552             :   case AArch64::LDURSi:
     553             :   case AArch64::LDURWi:
     554             :   case AArch64::LDURXi:
     555           4 :     DestRegIdx = 0;
     556           4 :     BaseRegIdx = 1;
     557           4 :     OffsetIdx = 2;
     558           4 :     IsPrePost = false;
     559           4 :     break;
     560             : 
     561           2 :   case AArch64::LDRBBpost:
     562             :   case AArch64::LDRBBpre:
     563             :   case AArch64::LDRBpost:
     564             :   case AArch64::LDRBpre:
     565             :   case AArch64::LDRDpost:
     566             :   case AArch64::LDRDpre:
     567             :   case AArch64::LDRHHpost:
     568             :   case AArch64::LDRHHpre:
     569             :   case AArch64::LDRHpost:
     570             :   case AArch64::LDRHpre:
     571             :   case AArch64::LDRQpost:
     572             :   case AArch64::LDRQpre:
     573             :   case AArch64::LDRSBWpost:
     574             :   case AArch64::LDRSBWpre:
     575             :   case AArch64::LDRSBXpost:
     576             :   case AArch64::LDRSBXpre:
     577             :   case AArch64::LDRSHWpost:
     578             :   case AArch64::LDRSHWpre:
     579             :   case AArch64::LDRSHXpost:
     580             :   case AArch64::LDRSHXpre:
     581             :   case AArch64::LDRSWpost:
     582             :   case AArch64::LDRSWpre:
     583             :   case AArch64::LDRSpost:
     584             :   case AArch64::LDRSpre:
     585             :   case AArch64::LDRWpost:
     586             :   case AArch64::LDRWpre:
     587             :   case AArch64::LDRXpost:
     588             :   case AArch64::LDRXpre:
     589           2 :     DestRegIdx = 1;
     590           2 :     BaseRegIdx = 2;
     591           2 :     OffsetIdx = 3;
     592           2 :     IsPrePost = true;
     593           2 :     break;
     594             : 
     595           0 :   case AArch64::LDPQi:
     596           0 :     DestRegIdx = -1;
     597           0 :     BaseRegIdx = 2;
     598           0 :     OffsetIdx = 3;
     599           0 :     IsPrePost = false;
     600           0 :     break;
     601             : 
     602           8 :   case AArch64::LDPDi:
     603             :   case AArch64::LDPSWi:
     604             :   case AArch64::LDPSi:
     605             :   case AArch64::LDPWi:
     606             :   case AArch64::LDPXi:
     607           8 :     DestRegIdx = 0;
     608           8 :     BaseRegIdx = 2;
     609           8 :     OffsetIdx = 3;
     610           8 :     IsPrePost = false;
     611           8 :     break;
     612             : 
     613           0 :   case AArch64::LDPQpost:
     614             :   case AArch64::LDPQpre:
     615           0 :     DestRegIdx = -1;
     616           0 :     BaseRegIdx = 3;
     617           0 :     OffsetIdx = 4;
     618           0 :     IsPrePost = true;
     619           0 :     break;
     620             : 
     621           0 :   case AArch64::LDPDpost:
     622             :   case AArch64::LDPDpre:
     623             :   case AArch64::LDPSWpost:
     624             :   case AArch64::LDPSWpre:
     625             :   case AArch64::LDPSpost:
     626             :   case AArch64::LDPSpre:
     627             :   case AArch64::LDPWpost:
     628             :   case AArch64::LDPWpre:
     629             :   case AArch64::LDPXpost:
     630             :   case AArch64::LDPXpre:
     631           0 :     DestRegIdx = 1;
     632           0 :     BaseRegIdx = 3;
     633           0 :     OffsetIdx = 4;
     634           0 :     IsPrePost = true;
     635           0 :     break;
     636             :   }
     637             : 
     638          14 :   LoadInfo LI;
     639          28 :   LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
     640          28 :   LI.BaseReg = MI.getOperand(BaseRegIdx).getReg();
     641          14 :   LI.BaseRegIdx = BaseRegIdx;
     642          28 :   LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
     643          14 :   LI.IsPrePost = IsPrePost;
     644             :   return LI;
     645             : }
     646             : 
     647          18 : static Optional<unsigned> getTag(const TargetRegisterInfo *TRI,
     648             :                                  const MachineInstr &MI, const LoadInfo &LI) {
     649          36 :   unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
     650          36 :   unsigned Base = TRI->getEncodingValue(LI.BaseReg);
     651             :   unsigned Off;
     652          18 :   if (LI.OffsetOpnd == nullptr)
     653             :     Off = 0;
     654          54 :   else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
     655          18 :            LI.OffsetOpnd->isCPI())
     656             :     return None;
     657          18 :   else if (LI.OffsetOpnd->isReg())
     658           0 :     Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
     659             :   else
     660          18 :     Off = LI.OffsetOpnd->getImm() >> 2;
     661             : 
     662          36 :   return makeTag(Dest, Base, Off);
     663             : }
     664             : 
     665           4 : void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
     666             :   // Build the initial tag map for the whole loop.
     667           4 :   TagMap.clear();
     668          20 :   for (MachineBasicBlock *MBB : L.getBlocks())
     669          82 :     for (MachineInstr &MI : *MBB) {
     670          41 :       Optional<LoadInfo> LInfo = getLoadInfo(MI);
     671          33 :       if (!LInfo)
     672          50 :         continue;
     673          16 :       Optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
     674           8 :       if (!Tag)
     675             :         continue;
     676          16 :       TagMap[*Tag].push_back(&MI);
     677             :     }
     678             : 
     679           4 :   bool AnyCollisions = false;
     680          12 :   for (auto &P : TagMap) {
     681           6 :     auto Size = P.second.size();
     682           3 :     if (Size > 1) {
     683           3 :       for (auto *MI : P.second) {
     684           3 :         if (TII->isStridedAccess(*MI)) {
     685             :           AnyCollisions = true;
     686             :           break;
     687             :         }
     688             :       }
     689             :     }
     690           3 :     if (AnyCollisions)
     691             :       break;
     692             :   }
     693             :   // Nothing to fix.
     694           4 :   if (!AnyCollisions)
     695           1 :     return;
     696             : 
     697           3 :   MachineRegisterInfo &MRI = Fn.getRegInfo();
     698             : 
     699             :   // Go through all the basic blocks in the current loop and fix any streaming
     700             :   // loads to avoid collisions with any other loads.
     701           9 :   LiveRegUnits LR(*TRI);
     702          15 :   for (MachineBasicBlock *MBB : L.getBlocks()) {
     703           3 :     LR.clear();
     704           3 :     LR.addLiveOuts(*MBB);
     705          99 :     for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
     706          30 :       MachineInstr &MI = *I;
     707          30 :       if (!TII->isStridedAccess(MI))
     708          50 :         continue;
     709             : 
     710          18 :       LoadInfo LdI = *getLoadInfo(MI);
     711          18 :       unsigned OldTag = *getTag(TRI, MI, LdI);
     712          12 :       auto &OldCollisions = TagMap[OldTag];
     713          12 :       if (OldCollisions.size() <= 1)
     714           2 :         continue;
     715             : 
     716           4 :       bool Fixed = false;
     717             :       DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
     718             : 
     719          16 :       for (unsigned ScratchReg : AArch64::GPR64RegClass) {
     720          12 :         if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
     721           8 :           continue;
     722             : 
     723           4 :         LoadInfo NewLdI(LdI);
     724           4 :         NewLdI.BaseReg = ScratchReg;
     725          12 :         unsigned NewTag = *getTag(TRI, MI, NewLdI);
     726             :         // Scratch reg tag would collide too, so don't use it.
     727           8 :         if (TagMap.count(NewTag))
     728           0 :           continue;
     729             : 
     730             :         DEBUG(dbgs() << "Changing base reg to: " << PrintReg(ScratchReg, TRI)
     731             :                      << '\n');
     732             : 
     733             :         // Rewrite:
     734             :         //   Xd = LOAD Xb, off
     735             :         // to:
     736             :         //   Xc = MOV Xb
     737             :         //   Xd = LOAD Xc, off
     738          12 :         DebugLoc DL = MI.getDebugLoc();
     739          16 :         BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
     740           4 :             .addReg(AArch64::XZR)
     741           4 :             .addReg(LdI.BaseReg)
     742           4 :             .addImm(0);
     743           8 :         MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
     744           4 :         BaseOpnd.setReg(ScratchReg);
     745             : 
     746             :         // If the load does a pre/post increment, then insert a MOV after as
     747             :         // well to update the real base register.
     748           4 :         if (LdI.IsPrePost) {
     749             :           DEBUG(dbgs() << "Doing post MOV of incremented reg: "
     750             :                        << PrintReg(ScratchReg, TRI) << '\n');
     751           1 :           MI.getOperand(0).setReg(
     752             :               ScratchReg); // Change tied operand pre/post update dest.
     753           2 :           BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
     754           4 :                   TII->get(AArch64::ORRXrs), LdI.BaseReg)
     755           1 :               .addReg(AArch64::XZR)
     756           1 :               .addReg(ScratchReg)
     757           1 :               .addImm(0);
     758             :         }
     759             : 
     760          10 :         for (int I = 0, E = OldCollisions.size(); I != E; ++I)
     761          12 :           if (OldCollisions[I] == &MI) {
     762          16 :             std::swap(OldCollisions[I], OldCollisions[E - 1]);
     763           4 :             OldCollisions.pop_back();
     764             :             break;
     765             :           }
     766             : 
     767             :         // Update TagMap to reflect instruction changes to reduce the number
     768             :         // of later MOVs to be inserted.  This needs to be done after
     769             :         // OldCollisions is updated since it may be relocated by this
     770             :         // insertion.
     771           8 :         TagMap[NewTag].push_back(&MI);
     772           4 :         ++NumCollisionsAvoided;
     773           4 :         Fixed = true;
     774           4 :         Modified = true;
     775             :         break;
     776             :       }
     777             :       if (!Fixed)
     778             :         ++NumCollisionsNotAvoided;
     779             :     }
     780             :   }
     781             : }
     782             : 
     783       11001 : bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
     784       11001 :   auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
     785       11001 :   if (ST.getProcFamily() != AArch64Subtarget::Falkor)
     786             :     return false;
     787             : 
     788          50 :   if (skipFunction(*Fn.getFunction()))
     789             :     return false;
     790             : 
     791          50 :   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
     792          50 :   TRI = ST.getRegisterInfo();
     793             : 
     794             :   assert(TRI->trackLivenessAfterRegAlloc(Fn) &&
     795             :          "Register liveness not available!");
     796             : 
     797          50 :   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
     798             : 
     799          50 :   Modified = false;
     800             : 
     801         204 :   for (MachineLoop *I : LI)
     802          20 :     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
     803             :       // Only process inner-loops
     804           8 :       if (L->empty())
     805           4 :         runOnLoop(**L, Fn);
     806             : 
     807          50 :   return Modified;
     808             : }
     809             : 
     810         914 : FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }

Generated by: LCOV version 1.13