LCOV - code coverage report
Current view: top level - lib/Target/ARM - A15SDOptimizer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 224 265 84.5 %
Date: 2017-09-14 15:23:50 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // The Cortex-A15 processor employs a tracking scheme in its register renaming
      11             : // in order to process each instruction's micro-ops speculatively and
      12             : // out-of-order with appropriate forwarding. The ARM architecture allows VFP
      13             : // instructions to read and write 32-bit S-registers.  Each S-register
      14             : // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
      15             : //
      16             : // There are several instruction patterns which can be used to provide this
      17             : // capability which can provide higher performance than other, potentially more
      18             : // direct patterns, specifically around when one micro-op reads a D-register
      19             : // operand that has recently been written as one or more S-register results.
      20             : //
      21             : // This file defines a pre-regalloc pass which looks for SPR producers which
      22             : // are going to be used by a DPR (or QPR) consumers and creates the more
      23             : // optimized access pattern.
      24             : //
      25             : //===----------------------------------------------------------------------===//
      26             : 
      27             : #include "ARM.h"
      28             : #include "ARMBaseInstrInfo.h"
      29             : #include "ARMBaseRegisterInfo.h"
      30             : #include "ARMSubtarget.h"
      31             : #include "llvm/ADT/Statistic.h"
      32             : #include "llvm/CodeGen/MachineFunction.h"
      33             : #include "llvm/CodeGen/MachineFunctionPass.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      37             : #include "llvm/Support/Debug.h"
      38             : #include "llvm/Support/raw_ostream.h"
      39             : #include "llvm/Target/TargetRegisterInfo.h"
      40             : #include "llvm/Target/TargetSubtargetInfo.h"
      41             : #include <map>
      42             : #include <set>
      43             : 
      44             : using namespace llvm;
      45             : 
      46             : #define DEBUG_TYPE "a15-sd-optimizer"
      47             : 
      48             : namespace {
      49        9176 :   struct A15SDOptimizer : public MachineFunctionPass {
      50             :     static char ID;
      51        6963 :     A15SDOptimizer() : MachineFunctionPass(ID) {}
      52             : 
      53             :     bool runOnMachineFunction(MachineFunction &Fn) override;
      54             : 
      55        2314 :     StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
      56             : 
      57             :   private:
      58             :     const ARMBaseInstrInfo *TII;
      59             :     const TargetRegisterInfo *TRI;
      60             :     MachineRegisterInfo *MRI;
      61             : 
      62             :     bool runOnInstruction(MachineInstr *MI);
      63             : 
      64             :     //
      65             :     // Instruction builder helpers
      66             :     //
      67             :     unsigned createDupLane(MachineBasicBlock &MBB,
      68             :                            MachineBasicBlock::iterator InsertBefore,
      69             :                            const DebugLoc &DL, unsigned Reg, unsigned Lane,
      70             :                            bool QPR = false);
      71             : 
      72             :     unsigned createExtractSubreg(MachineBasicBlock &MBB,
      73             :                                  MachineBasicBlock::iterator InsertBefore,
      74             :                                  const DebugLoc &DL, unsigned DReg,
      75             :                                  unsigned Lane, const TargetRegisterClass *TRC);
      76             : 
      77             :     unsigned createVExt(MachineBasicBlock &MBB,
      78             :                         MachineBasicBlock::iterator InsertBefore,
      79             :                         const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
      80             : 
      81             :     unsigned createRegSequence(MachineBasicBlock &MBB,
      82             :                                MachineBasicBlock::iterator InsertBefore,
      83             :                                const DebugLoc &DL, unsigned Reg1,
      84             :                                unsigned Reg2);
      85             : 
      86             :     unsigned createInsertSubreg(MachineBasicBlock &MBB,
      87             :                                 MachineBasicBlock::iterator InsertBefore,
      88             :                                 const DebugLoc &DL, unsigned DReg,
      89             :                                 unsigned Lane, unsigned ToInsert);
      90             : 
      91             :     unsigned createImplicitDef(MachineBasicBlock &MBB,
      92             :                                MachineBasicBlock::iterator InsertBefore,
      93             :                                const DebugLoc &DL);
      94             : 
      95             :     //
      96             :     // Various property checkers
      97             :     //
      98             :     bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
      99             :     bool hasPartialWrite(MachineInstr *MI);
     100             :     SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
     101             :     unsigned getDPRLaneFromSPR(unsigned SReg);
     102             : 
     103             :     //
     104             :     // Methods used for getting the definitions of partial registers
     105             :     //
     106             : 
     107             :     MachineInstr *elideCopies(MachineInstr *MI);
     108             :     void elideCopiesAndPHIs(MachineInstr *MI,
     109             :                             SmallVectorImpl<MachineInstr*> &Outs);
     110             : 
     111             :     //
     112             :     // Pattern optimization methods
     113             :     //
     114             :     unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
     115             :     unsigned optimizeSDPattern(MachineInstr *MI);
     116             :     unsigned getPrefSPRLane(unsigned SReg);
     117             : 
     118             :     //
     119             :     // Sanitizing method - used to make sure if don't leave dead code around.
     120             :     //
     121             :     void eraseInstrWithNoUses(MachineInstr *MI);
     122             : 
     123             :     //
     124             :     // A map used to track the changes done by this pass.
     125             :     //
     126             :     std::map<MachineInstr*, unsigned> Replacements;
     127             :     std::set<MachineInstr *> DeadInstr;
     128             :   };
     129             :   char A15SDOptimizer::ID = 0;
     130             : } // end anonymous namespace
     131             : 
     132             : // Returns true if this is a use of a SPR register.
     133        3200 : bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
     134             :                                   const TargetRegisterClass *TRC) {
     135        3200 :   if (!MO.isReg())
     136             :     return false;
     137        3200 :   unsigned Reg = MO.getReg();
     138             : 
     139        3200 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
     140        4017 :     return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
     141             :   else
     142        1861 :     return TRC->contains(Reg);
     143             : }
     144             : 
     145             : unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
     146             :   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
     147           2 :                                            &ARM::DPRRegClass);
     148           1 :   if (DReg != ARM::NoRegister) return ARM::ssub_1;
     149             :   return ARM::ssub_0;
     150             : }
     151             : 
     152             : // Get the subreg type that is most likely to be coalesced
     153             : // for an SPR register that will be used in VDUP32d pseudo.
     154           5 : unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
     155           5 :   if (!TRI->isVirtualRegister(SReg))
     156           0 :     return getDPRLaneFromSPR(SReg);
     157             : 
     158           5 :   MachineInstr *MI = MRI->getVRegDef(SReg);
     159           5 :   if (!MI) return ARM::ssub_0;
     160           5 :   MachineOperand *MO = MI->findRegisterDefOperand(SReg);
     161             : 
     162             :   assert(MO->isReg() && "Non-register operand found!");
     163           5 :   if (!MO) return ARM::ssub_0;
     164             : 
     165           6 :   if (MI->isCopy() && usesRegClass(MI->getOperand(1),
     166             :                                     &ARM::SPRRegClass)) {
     167           1 :     SReg = MI->getOperand(1).getReg();
     168             :   }
     169             : 
     170           5 :   if (TargetRegisterInfo::isVirtualRegister(SReg)) {
     171           4 :     if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
     172           4 :     return ARM::ssub_0;
     173             :   }
     174           1 :   return getDPRLaneFromSPR(SReg);
     175             : }
     176             : 
     177             : // MI is known to be dead. Figure out what instructions
     178             : // are also made dead by this and mark them for removal.
     179           5 : void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
     180          10 :   SmallVector<MachineInstr *, 8> Front;
     181          10 :   DeadInstr.insert(MI);
     182             : 
     183             :   DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
     184           5 :   Front.push_back(MI);
     185             : 
     186          10 :   while (Front.size() != 0) {
     187          10 :     MI = Front.back();
     188           5 :     Front.pop_back();
     189             : 
     190             :     // MI is already known to be dead. We need to see
     191             :     // if other instructions can also be removed.
     192          38 :     for (MachineOperand &MO : MI->operands()) {
     193          45 :       if ((!MO.isReg()) || (!MO.isUse()))
     194          44 :         continue;
     195          12 :       unsigned Reg = MO.getReg();
     196          12 :       if (!TRI->isVirtualRegister(Reg))
     197           0 :         continue;
     198          12 :       MachineOperand *Op = MI->findRegisterDefOperand(Reg);
     199             : 
     200           0 :       if (!Op)
     201          12 :         continue;
     202             : 
     203           0 :       MachineInstr *Def = Op->getParent();
     204             : 
     205             :       // We don't need to do anything if we have already marked
     206             :       // this instruction as being dead.
     207           0 :       if (DeadInstr.find(Def) != DeadInstr.end())
     208           0 :         continue;
     209             : 
     210             :       // Check if all the uses of this instruction are marked as
     211             :       // dead. If so, we can also mark this instruction as being
     212             :       // dead.
     213           0 :       bool IsDead = true;
     214           0 :       for (MachineOperand &MODef : Def->operands()) {
     215           0 :         if ((!MODef.isReg()) || (!MODef.isDef()))
     216           0 :           continue;
     217           0 :         unsigned DefReg = MODef.getReg();
     218           0 :         if (!TRI->isVirtualRegister(DefReg)) {
     219             :           IsDead = false;
     220             :           break;
     221             :         }
     222           0 :         for (MachineInstr &Use : MRI->use_instructions(Reg)) {
     223             :           // We don't care about self references.
     224           0 :           if (&Use == Def)
     225           0 :             continue;
     226           0 :           if (DeadInstr.find(&Use) == DeadInstr.end()) {
     227             :             IsDead = false;
     228             :             break;
     229             :           }
     230             :         }
     231             :       }
     232             : 
     233           0 :       if (!IsDead) continue;
     234             : 
     235             :       DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
     236           0 :       DeadInstr.insert(Def);
     237             :     }
     238             :   }
     239           5 : }
     240             : 
     241             : // Creates the more optimized patterns and generally does all the code
     242             : // transformations in this pass.
     243           7 : unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
     244           7 :   if (MI->isCopy()) {
     245           0 :     return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
     246             :   }
     247             : 
     248           7 :   if (MI->isInsertSubreg()) {
     249           2 :     unsigned DPRReg = MI->getOperand(1).getReg();
     250           2 :     unsigned SPRReg = MI->getOperand(2).getReg();
     251             : 
     252           4 :     if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
     253           2 :       MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
     254           2 :       MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
     255             : 
     256           2 :       if (DPRMI && SPRMI) {
     257             :         // See if the first operand of this insert_subreg is IMPLICIT_DEF
     258           2 :         MachineInstr *ECDef = elideCopies(DPRMI);
     259           3 :         if (ECDef && ECDef->isImplicitDef()) {
     260             :           // Another corner case - if we're inserting something that is purely
     261             :           // a subreg copy of a DPR, just use that DPR.
     262             : 
     263           1 :           MachineInstr *EC = elideCopies(SPRMI);
     264             :           // Is it a subreg copy of ssub_0?
     265           2 :           if (EC && EC->isCopy() &&
     266           0 :               EC->getOperand(1).getSubReg() == ARM::ssub_0) {
     267             :             DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
     268             : 
     269             :             // Find the thing we're subreg copying out of - is it of the same
     270             :             // regclass as DPRMI? (i.e. a DPR or QPR).
     271           0 :             unsigned FullReg = SPRMI->getOperand(1).getReg();
     272             :             const TargetRegisterClass *TRC =
     273           0 :               MRI->getRegClass(MI->getOperand(1).getReg());
     274           0 :             if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
     275             :               DEBUG(dbgs() << "Subreg copy is compatible - returning ");
     276             :               DEBUG(dbgs() << PrintReg(FullReg) << "\n");
     277           0 :               eraseInstrWithNoUses(MI);
     278           0 :               return FullReg;
     279             :             }
     280             :           }
     281             : 
     282           1 :           return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
     283             :         }
     284             :       }
     285             :     }
     286           1 :     return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
     287             :   }
     288             : 
     289          10 :   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
     290             :                                           &ARM::SPRRegClass)) {
     291             :     // See if all bar one of the operands are IMPLICIT_DEF and insert the
     292             :     // optimizer pattern accordingly.
     293             :     unsigned NumImplicit = 0, NumTotal = 0;
     294             :     unsigned NonImplicitReg = ~0U;
     295             : 
     296          61 :     for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
     297          84 :       if (!MI->getOperand(I).isReg())
     298          14 :         continue;
     299          14 :       ++NumTotal;
     300          14 :       unsigned OpReg = MI->getOperand(I).getReg();
     301             : 
     302          14 :       if (!TRI->isVirtualRegister(OpReg))
     303             :         break;
     304             : 
     305          14 :       MachineInstr *Def = MRI->getVRegDef(OpReg);
     306          14 :       if (!Def)
     307             :         break;
     308          14 :       if (Def->isImplicitDef())
     309           8 :         ++NumImplicit;
     310             :       else
     311          12 :         NonImplicitReg = MI->getOperand(I).getReg();
     312             :     }
     313             : 
     314           5 :     if (NumImplicit == NumTotal - 1)
     315           4 :       return optimizeAllLanesPattern(MI, NonImplicitReg);
     316             :     else
     317           1 :       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
     318             :   }
     319             : 
     320           0 :   llvm_unreachable("Unhandled update pattern!");
     321             : }
     322             : 
     323             : // Return true if this MachineInstr inserts a scalar (SPR) value into
     324             : // a D or Q register.
     325          37 : bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
     326             :   // The only way we can do a partial register update is through a COPY,
     327             :   // INSERT_SUBREG or REG_SEQUENCE.
     328          40 :   if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
     329             :     return true;
     330             : 
     331          39 :   if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
     332             :                                            &ARM::SPRRegClass))
     333             :     return true;
     334             : 
     335          41 :   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
     336             :     return true;
     337             : 
     338             :   return false;
     339             : }
     340             : 
     341             : // Looks through full copies to get the instruction that defines the input
     342             : // operand for MI.
     343           3 : MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
     344           1 :   if (!MI->isFullCopy())
     345             :     return MI;
     346           2 :   if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
     347             :     return nullptr;
     348           0 :   MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
     349           0 :   if (!Def)
     350             :     return nullptr;
     351             :   return elideCopies(Def);
     352             : }
     353             : 
     354             : // Look through full copies and PHIs to get the set of non-copy MachineInstrs
     355             : // that can produce MI.
     356          52 : void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
     357             :                                         SmallVectorImpl<MachineInstr*> &Outs) {
     358             :    // Looking through PHIs may create loops so we need to track what
     359             :    // instructions we have visited before.
     360         104 :    std::set<MachineInstr *> Reached;
     361         104 :    SmallVector<MachineInstr *, 8> Front;
     362          52 :    Front.push_back(MI);
     363         108 :    while (Front.size() != 0) {
     364         112 :      MI = Front.back();
     365          56 :      Front.pop_back();
     366             : 
     367             :      // If we have already explored this MachineInstr, ignore it.
     368         112 :      if (Reached.find(MI) != Reached.end())
     369             :        continue;
     370          56 :      Reached.insert(MI);
     371         112 :      if (MI->isPHI()) {
     372           0 :        for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
     373           0 :          unsigned Reg = MI->getOperand(I).getReg();
     374           0 :          if (!TRI->isVirtualRegister(Reg)) {
     375           0 :            continue;
     376             :          }
     377           0 :          MachineInstr *NewMI = MRI->getVRegDef(Reg);
     378           0 :          if (!NewMI)
     379             :            continue;
     380           0 :          Front.push_back(NewMI);
     381             :        }
     382          70 :      } else if (MI->isFullCopy()) {
     383          28 :        if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
     384          10 :          continue;
     385           4 :        MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
     386           4 :        if (!NewMI)
     387             :          continue;
     388           4 :        Front.push_back(NewMI);
     389             :      } else {
     390             :        DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
     391          42 :        Outs.push_back(MI);
     392             :      }
     393             :    }
     394          52 : }
     395             : 
     396             : // Return the DPR virtual registers that are read by this machine instruction
     397             : // (if any).
     398         664 : SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
     399        2155 :   if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
     400         534 :       MI->isKill())
     401             :     return SmallVector<unsigned, 8>();
     402             : 
     403         534 :   SmallVector<unsigned, 8> Defs;
     404        6036 :   for (MachineOperand &MO : MI->operands()) {
     405        5390 :     if (!MO.isReg() || !MO.isUse())
     406        1399 :       continue;
     407        3171 :     if (!usesRegClass(MO, &ARM::DPRRegClass) &&
     408        3175 :         !usesRegClass(MO, &ARM::QPRRegClass) &&
     409        1032 :         !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
     410        1028 :       continue;
     411             : 
     412          57 :     Defs.push_back(MO.getReg());
     413             :   }
     414         534 :   return Defs;
     415             : }
     416             : 
     417             : // Creates a DPR register from an SPR one by using a VDUP.
     418          13 : unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
     419             :                                        MachineBasicBlock::iterator InsertBefore,
     420             :                                        const DebugLoc &DL, unsigned Reg,
     421             :                                        unsigned Lane, bool QPR) {
     422          13 :   unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
     423          13 :                                                   &ARM::DPRRegClass);
     424          26 :   BuildMI(MBB, InsertBefore, DL,
     425          26 :           TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
     426          13 :       .addReg(Reg)
     427          26 :       .addImm(Lane)
     428          39 :       .add(predOps(ARMCC::AL));
     429             : 
     430          13 :   return Out;
     431             : }
     432             : 
     433             : // Creates a SPR register from a DPR by copying the value in lane 0.
     434           4 : unsigned A15SDOptimizer::createExtractSubreg(
     435             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     436             :     const DebugLoc &DL, unsigned DReg, unsigned Lane,
     437             :     const TargetRegisterClass *TRC) {
     438           4 :   unsigned Out = MRI->createVirtualRegister(TRC);
     439           8 :   BuildMI(MBB,
     440             :           InsertBefore,
     441             :           DL,
     442           8 :           TII->get(TargetOpcode::COPY), Out)
     443           4 :     .addReg(DReg, 0, Lane);
     444             : 
     445           4 :   return Out;
     446             : }
     447             : 
     448             : // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
     449           2 : unsigned A15SDOptimizer::createRegSequence(
     450             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     451             :     const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
     452           2 :   unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
     453           4 :   BuildMI(MBB,
     454             :           InsertBefore,
     455             :           DL,
     456           4 :           TII->get(TargetOpcode::REG_SEQUENCE), Out)
     457           2 :     .addReg(Reg1)
     458           2 :     .addImm(ARM::dsub_0)
     459           2 :     .addReg(Reg2)
     460           2 :     .addImm(ARM::dsub_1);
     461           2 :   return Out;
     462             : }
     463             : 
     464             : // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
     465             : // and merges them into one DPR register.
     466           4 : unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
     467             :                                     MachineBasicBlock::iterator InsertBefore,
     468             :                                     const DebugLoc &DL, unsigned Ssub0,
     469             :                                     unsigned Ssub1) {
     470           4 :   unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
     471          12 :   BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
     472           4 :       .addReg(Ssub0)
     473           4 :       .addReg(Ssub1)
     474           4 :       .addImm(1)
     475          12 :       .add(predOps(ARMCC::AL));
     476           4 :   return Out;
     477             : }
     478             : 
     479           5 : unsigned A15SDOptimizer::createInsertSubreg(
     480             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     481             :     const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
     482           5 :   unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
     483          10 :   BuildMI(MBB,
     484             :           InsertBefore,
     485             :           DL,
     486          10 :           TII->get(TargetOpcode::INSERT_SUBREG), Out)
     487           5 :     .addReg(DReg)
     488           5 :     .addReg(ToInsert)
     489          10 :     .addImm(Lane);
     490             : 
     491           5 :   return Out;
     492             : }
     493             : 
     494             : unsigned
     495           5 : A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
     496             :                                   MachineBasicBlock::iterator InsertBefore,
     497             :                                   const DebugLoc &DL) {
     498           5 :   unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
     499           5 :   BuildMI(MBB,
     500             :           InsertBefore,
     501             :           DL,
     502          10 :           TII->get(TargetOpcode::IMPLICIT_DEF), Out);
     503           5 :   return Out;
     504             : }
     505             : 
     506             : // This function inserts instructions in order to optimize interactions between
     507             : // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
     508             : // lanes, and the using VEXT instructions to recompose the result.
     509             : unsigned
     510           7 : A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
     511           7 :   MachineBasicBlock::iterator InsertPt(MI);
     512          21 :   DebugLoc DL = MI->getDebugLoc();
     513           7 :   MachineBasicBlock &MBB = *MI->getParent();
     514          14 :   InsertPt++;
     515             :   unsigned Out;
     516             : 
     517             :   // DPair has the same length as QPR and also has two DPRs as subreg.
     518             :   // Treat DPair as QPR.
     519          26 :   if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
     520          15 :       MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
     521             :     unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
     522           2 :                                          ARM::dsub_0, &ARM::DPRRegClass);
     523             :     unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
     524           2 :                                          ARM::dsub_1, &ARM::DPRRegClass);
     525             : 
     526           2 :     unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
     527           2 :     unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
     528           2 :     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
     529             : 
     530           2 :     unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
     531           2 :     unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
     532           2 :     Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
     533             : 
     534           2 :     Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
     535             : 
     536          15 :   } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
     537           0 :     unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
     538           0 :     unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
     539           0 :     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
     540             : 
     541             :   } else {
     542             :     assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
     543             :            "Found unexpected regclass!");
     544             : 
     545           5 :     unsigned PrefLane = getPrefSPRLane(Reg);
     546             :     unsigned Lane;
     547           5 :     switch (PrefLane) {
     548             :       case ARM::ssub_0: Lane = 0; break;
     549           0 :       case ARM::ssub_1: Lane = 1; break;
     550           0 :       default: llvm_unreachable("Unknown preferred lane!");
     551             :     }
     552             : 
     553             :     // Treat DPair as QPR
     554           8 :     bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
     555           8 :                    usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
     556             : 
     557           5 :     Out = createImplicitDef(MBB, InsertPt, DL);
     558           5 :     Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
     559           5 :     Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
     560           5 :     eraseInstrWithNoUses(MI);
     561             :   }
     562          14 :   return Out;
     563             : }
     564             : 
     565         664 : bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
     566             :   // We look for instructions that write S registers that are then read as
     567             :   // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
     568             :   // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
     569             :   // merge two SPR values to form a DPR register.  In order avoid false
     570             :   // positives we make sure that there is an SPR producer so we look past
     571             :   // COPY and PHI nodes to find it.
     572             :   //
     573             :   // The best code pattern for when an SPR producer is going to be used by a
     574             :   // DPR or QPR consumer depends on whether the other lanes of the
     575             :   // corresponding DPR/QPR are currently defined.
     576             :   //
     577             :   // We can handle these efficiently, depending on the type of
     578             :   // pseudo-instruction that is producing the pattern
     579             :   //
     580             :   //   * COPY:          * VDUP all lanes and merge the results together
     581             :   //                      using VEXTs.
     582             :   //
     583             :   //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
     584             :   //                      lane, and the other lane(s) of the DPR/QPR register
     585             :   //                      that we are inserting in are undefined, use the
     586             :   //                      original DPR/QPR value.
     587             :   //                    * Otherwise, fall back on the same stategy as COPY.
     588             :   //
     589             :   //   * REG_SEQUENCE:  * If all except one of the input operands are
     590             :   //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
     591             :   //                      defined input operand
     592             :   //                    * Otherwise, fall back on the same stategy as COPY.
     593             :   //
     594             : 
     595             :   // First, get all the reads of D-registers done by this instruction.
     596        1328 :   SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
     597         664 :   bool Modified = false;
     598             : 
     599        1385 :   for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
     600         721 :      I != E; ++I) {
     601             :     // Follow the def-use chain for this DPR through COPYs, and also through
     602             :     // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
     603             :     // we can end up with multiple defs of this DPR.
     604             : 
     605         109 :     SmallVector<MachineInstr *, 8> DefSrcs;
     606         114 :     if (!TRI->isVirtualRegister(*I))
     607          10 :       continue;
     608          52 :     MachineInstr *Def = MRI->getVRegDef(*I);
     609          52 :     if (!Def)
     610           0 :       continue;
     611             : 
     612          52 :     elideCopiesAndPHIs(Def, DefSrcs);
     613             : 
     614         198 :     for (MachineInstr *MI : DefSrcs) {
     615             :       // If we've already analyzed and replaced this operand, don't do
     616             :       // anything.
     617         126 :       if (Replacements.find(MI) != Replacements.end())
     618          40 :         continue;
     619             : 
     620             :       // Now, work out if the instruction causes a SPR->DPR dependency.
     621          37 :       if (!hasPartialWrite(MI))
     622          30 :         continue;
     623             : 
     624             :       // Collect all the uses of this MI's DPR def for updating later.
     625          14 :       SmallVector<MachineOperand*, 8> Uses;
     626           7 :       unsigned DPRDefReg = MI->getOperand(0).getReg();
     627           7 :       for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
     628          28 :              E = MRI->use_end(); I != E; ++I)
     629          14 :         Uses.push_back(&*I);
     630             : 
     631             :       // We can optimize this.
     632           7 :       unsigned NewReg = optimizeSDPattern(MI);
     633             : 
     634           7 :       if (NewReg != 0) {
     635           7 :         Modified = true;
     636          21 :         for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
     637           7 :                E = Uses.end(); I != E; ++I) {
     638             :           // Make sure to constrain the register class of the new register to
     639             :           // match what we're replacing. Otherwise we can optimize a DPR_VFP2
     640             :           // reference into a plain DPR, and that will end poorly. NewReg is
     641             :           // always virtual here, so there will always be a matching subclass
     642             :           // to find.
     643          28 :           MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
     644             : 
     645             :           DEBUG(dbgs() << "Replacing operand "
     646             :                        << **I << " with "
     647             :                        << PrintReg(NewReg) << "\n");
     648          14 :           (*I)->substVirtReg(NewReg, 0, *TRI);
     649             :         }
     650             :       }
     651           7 :       Replacements[MI] = NewReg;
     652             :     }
     653             :   }
     654        1328 :   return Modified;
     655             : }
     656             : 
     657       10842 : bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
     658       10842 :   if (skipFunction(*Fn.getFunction()))
     659             :     return false;
     660             : 
     661       10834 :   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
     662             :   // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
     663             :   // enabled when NEON is available.
     664       10834 :   if (!(STI.isCortexA15() && STI.hasNEON()))
     665             :     return false;
     666          46 :   TII = STI.getInstrInfo();
     667          46 :   TRI = STI.getRegisterInfo();
     668          46 :   MRI = &Fn.getRegInfo();
     669          46 :   bool Modified = false;
     670             : 
     671             :   DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
     672             : 
     673          92 :   DeadInstr.clear();
     674          92 :   Replacements.clear();
     675             : 
     676         236 :   for (MachineBasicBlock &MBB : Fn) {
     677        1720 :     for (MachineInstr &MI : MBB) {
     678         664 :       Modified |= runOnInstruction(&MI);
     679             :     }
     680             :   }
     681             : 
     682         143 :   for (MachineInstr *MI : DeadInstr) {
     683           5 :     MI->eraseFromParent();
     684             :   }
     685             : 
     686             :   return Modified;
     687             : }
     688             : 
     689        2321 : FunctionPass *llvm::createA15SDOptimizerPass() {
     690        4642 :   return new A15SDOptimizer();
     691             : }

Generated by: LCOV version 1.13