LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64StorePairSuppress.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 41 44 93.2 %
Date: 2018-10-20 13:21:21 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass identifies floating point stores that should not be combined into
      11             : // store pairs. Later we may do the same for floating point loads.
      12             : // ===---------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64InstrInfo.h"
      15             : #include "llvm/CodeGen/MachineFunction.h"
      16             : #include "llvm/CodeGen/MachineFunctionPass.h"
      17             : #include "llvm/CodeGen/MachineInstr.h"
      18             : #include "llvm/CodeGen/MachineTraceMetrics.h"
      19             : #include "llvm/CodeGen/TargetInstrInfo.h"
      20             : #include "llvm/CodeGen/TargetSchedule.h"
      21             : #include "llvm/Support/Debug.h"
      22             : #include "llvm/Support/raw_ostream.h"
      23             : 
      24             : using namespace llvm;
      25             : 
      26             : #define DEBUG_TYPE "aarch64-stp-suppress"
      27             : 
      28             : #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
      29             : 
      30             : namespace {
      31             : class AArch64StorePairSuppress : public MachineFunctionPass {
      32             :   const AArch64InstrInfo *TII;
      33             :   const TargetRegisterInfo *TRI;
      34             :   const MachineRegisterInfo *MRI;
      35             :   TargetSchedModel SchedModel;
      36             :   MachineTraceMetrics *Traces;
      37             :   MachineTraceMetrics::Ensemble *MinInstr;
      38             : 
      39             : public:
      40             :   static char ID;
      41        1117 :   AArch64StorePairSuppress() : MachineFunctionPass(ID) {
      42        1117 :     initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
      43        1117 :   }
      44             : 
      45        1111 :   StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; }
      46             : 
      47             :   bool runOnMachineFunction(MachineFunction &F) override;
      48             : 
      49             : private:
      50             :   bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
      51             : 
      52             :   bool isNarrowFPStore(const MachineInstr &MI);
      53             : 
      54        1104 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      55        1104 :     AU.setPreservesCFG();
      56             :     AU.addRequired<MachineTraceMetrics>();
      57             :     AU.addPreserved<MachineTraceMetrics>();
      58        1104 :     MachineFunctionPass::getAnalysisUsage(AU);
      59        1104 :   }
      60             : };
      61             : char AArch64StorePairSuppress::ID = 0;
      62             : } // anonymous
      63             : 
      64      200149 : INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress",
      65             :                 STPSUPPRESS_PASS_NAME, false, false)
      66             : 
      67        1117 : FunctionPass *llvm::createAArch64StorePairSuppressPass() {
      68        1117 :   return new AArch64StorePairSuppress();
      69             : }
      70             : 
      71             : /// Return true if an STP can be added to this block without increasing the
      72             : /// critical resource height. STP is good to form in Ld/St limited blocks and
      73             : /// bad to form in float-point limited blocks. This is true independent of the
      74             : /// critical path. If the critical path is longer than the resource height, the
      75             : /// extra vector ops can limit physreg renaming. Otherwise, it could simply
      76             : /// oversaturate the vector units.
      77          15 : bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
      78          15 :   if (!MinInstr)
      79          15 :     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
      80             : 
      81          15 :   MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
      82          15 :   unsigned ResLength = BBTrace.getResourceLength();
      83             : 
      84             :   // Get the machine model's scheduling class for STPQi.
      85             :   // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
      86          15 :   unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
      87             :   const MCSchedClassDesc *SCDesc =
      88          30 :       SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
      89             : 
      90             :   // If a subtarget does not define resources for STPQi, bail here.
      91          15 :   if (SCDesc->isValid() && !SCDesc->isVariant()) {
      92          15 :     unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc);
      93          15 :     if (ResLenWithSTP > ResLength) {
      94             :       LLVM_DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
      95             :                         << " resources " << ResLength << " -> " << ResLenWithSTP
      96             :                         << "\n");
      97           9 :       return false;
      98             :     }
      99             :   }
     100             :   return true;
     101             : }
     102             : 
     103             : /// Return true if this is a floating-point store smaller than the V reg. On
     104             : /// cyclone, these require a vector shuffle before storing a pair.
     105             : /// Ideally we would call getMatchingPairOpcode() and have the machine model
     106             : /// tell us if it's profitable with no cpu knowledge here.
     107             : ///
     108             : /// FIXME: We plan to develop a decent Target abstraction for simple loads and
     109             : /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
     110           0 : bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
     111       16758 :   switch (MI.getOpcode()) {
     112             :   default:
     113             :     return false;
     114           0 :   case AArch64::STRSui:
     115             :   case AArch64::STRDui:
     116             :   case AArch64::STURSi:
     117             :   case AArch64::STURDi:
     118           0 :     return true;
     119             :   }
     120             : }
     121             : 
     122       14056 : bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
     123       14056 :   if (skipFunction(MF.getFunction()))
     124             :     return false;
     125             : 
     126       14050 :   const TargetSubtargetInfo &ST = MF.getSubtarget();
     127       14050 :   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
     128       14050 :   TRI = ST.getRegisterInfo();
     129       14050 :   MRI = &MF.getRegInfo();
     130       14050 :   SchedModel.init(&ST);
     131       14050 :   Traces = &getAnalysis<MachineTraceMetrics>();
     132       14050 :   MinInstr = nullptr;
     133             : 
     134             :   LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
     135             : 
     136       14050 :   if (!SchedModel.hasInstrSchedModel()) {
     137             :     LLVM_DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
     138             :     return false;
     139             :   }
     140             : 
     141             :   // Check for a sequence of stores to the same base address. We don't need to
     142             :   // precisely determine whether a store pair can be formed. But we do want to
     143             :   // filter out most situations where we can't form store pairs to avoid
     144             :   // computing trace metrics in those cases.
     145        4328 :   for (auto &MBB : MF) {
     146             :     bool SuppressSTP = false;
     147             :     unsigned PrevBaseReg = 0;
     148       19066 :     for (auto &MI : MBB) {
     149             :       if (!isNarrowFPStore(MI))
     150       16668 :         continue;
     151             :       unsigned BaseReg;
     152             :       int64_t Offset;
     153          90 :       if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) {
     154          75 :         if (PrevBaseReg == BaseReg) {
     155             :           // If this block can take STPs, skip ahead to the next block.
     156          15 :           if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
     157             :             break;
     158             :           // Otherwise, continue unpairing the stores in this block.
     159             :           LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n");
     160             :           SuppressSTP = true;
     161           9 :           TII->suppressLdStPair(MI);
     162             :         }
     163          69 :         PrevBaseReg = BaseReg;
     164             :       } else
     165             :         PrevBaseReg = 0;
     166             :     }
     167             :   }
     168             :   // This pass just sets some internal MachineMemOperand flags. It can't really
     169             :   // invalidate anything.
     170             :   return false;
     171             : }

Generated by: LCOV version 1.13