LCOV - code coverage report
Current view: top level - lib/Target/ARM - Thumb2SizeReduction.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 435 453 96.0 %
Date: 2017-09-14 15:23:50 Functions: 20 20 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "ARM.h"
      11             : #include "ARMBaseInstrInfo.h"
      12             : #include "ARMSubtarget.h"
      13             : #include "MCTargetDesc/ARMBaseInfo.h"
      14             : #include "Thumb2InstrInfo.h"
      15             : #include "llvm/ADT/DenseMap.h"
      16             : #include "llvm/ADT/PostOrderIterator.h"
      17             : #include "llvm/ADT/STLExtras.h"
      18             : #include "llvm/ADT/SmallSet.h"
      19             : #include "llvm/ADT/SmallVector.h"
      20             : #include "llvm/ADT/Statistic.h"
      21             : #include "llvm/ADT/StringRef.h"
      22             : #include "llvm/CodeGen/MachineBasicBlock.h"
      23             : #include "llvm/CodeGen/MachineFunction.h"
      24             : #include "llvm/CodeGen/MachineFunctionPass.h"
      25             : #include "llvm/CodeGen/MachineInstr.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : #include "llvm/CodeGen/MachineOperand.h"
      28             : #include "llvm/IR/DebugLoc.h"
      29             : #include "llvm/IR/Function.h"
      30             : #include "llvm/MC/MCInstrDesc.h"
      31             : #include "llvm/MC/MCRegisterInfo.h"
      32             : #include "llvm/Support/CommandLine.h"
      33             : #include "llvm/Support/Compiler.h"
      34             : #include "llvm/Support/Debug.h"
      35             : #include "llvm/Support/ErrorHandling.h"
      36             : #include "llvm/Support/raw_ostream.h"
      37             : #include "llvm/Target/TargetInstrInfo.h"
      38             : #include <algorithm>
      39             : #include <cassert>
      40             : #include <cstdint>
      41             : #include <functional>
      42             : #include <iterator>
      43             : #include <utility>
      44             : 
      45             : using namespace llvm;
      46             : 
      47             : #define DEBUG_TYPE "t2-reduce-size"
      48             : 
      49             : STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
      50             : STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
      51             : STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
      52             : 
      53       72306 : static cl::opt<int> ReduceLimit("t2-reduce-limit",
      54      144612 :                                 cl::init(-1), cl::Hidden);
      55       72306 : static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
      56      144612 :                                      cl::init(-1), cl::Hidden);
      57       72306 : static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
      58      144612 :                                      cl::init(-1), cl::Hidden);
      59             : 
      60             : namespace {
      61             : 
      62             :   /// ReduceTable - A static table with information on mapping from wide
      63             :   /// opcodes to narrow
      64             :   struct ReduceEntry {
      65             :     uint16_t WideOpc;      // Wide opcode
      66             :     uint16_t NarrowOpc1;   // Narrow opcode to transform to
      67             :     uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
      68             :     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
      69             :     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
      70             :     unsigned LowRegs1 : 1; // Only possible if low-registers are used
      71             :     unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
      72             :     unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
      73             :                            // 1 - No cc field.
      74             :                            // 2 - Always set CPSR.
      75             :     unsigned PredCC2  : 2;
      76             :     unsigned PartFlag : 1; // 16-bit instruction does partial flag update
      77             :     unsigned Special  : 1; // Needs to be dealt with specially
      78             :     unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
      79             :   };
      80             : 
      81             :   static const ReduceEntry ReduceTable[] = {
      82             :   // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
      83             :   { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
      84             :   { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
      85             :   { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
      86             :   { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
      87             :   { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
      88             :   { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
      89             :   { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
      90             :   { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
      91             :   { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
      92             :   //FIXME: Disable CMN, as CCodes are backwards from compare expectations
      93             :   //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
      94             :   { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
      95             :   { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
      96             :   { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
      97             :   { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
      98             :   // FIXME: adr.n immediate offset must be multiple of 4.
      99             :   //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
     100             :   { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
     101             :   { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
     102             :   { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
     103             :   { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
     104             :   { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
     105             :   { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
     106             :   // FIXME: Do we need the 16-bit 'S' variant?
     107             :   { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
     108             :   { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
     109             :   { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
     110             :   { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
     111             :   { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
     112             :   { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
     113             :   { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
     114             :   { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
     115             :   { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
     116             :   { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
     117             :   { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
     118             :   { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
     119             :   { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
     120             :   { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
     121             :   { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
     122             :   { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     123             :   { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     124             :   { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
     125             :   { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     126             :   { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     127             : 
     128             :   // FIXME: Clean this up after splitting each Thumb load / store opcode
     129             :   // into multiple ones.
     130             :   { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
     131             :   { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
     132             :   { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     133             :   { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     134             :   { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     135             :   { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     136             :   { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     137             :   { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     138             :   { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
     139             :   { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
     140             :   { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
     141             :   { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     142             :   { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     143             :   { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     144             :   { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     145             :   { ARM::t2STR_POST,ARM::tSTMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
     146             : 
     147             :   { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
     148             :   { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
     149             :   { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
     150             :   // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
     151             :   // tSTMIA_UPD is a change in semantics which can only be used if the base
     152             :   // register is killed. This difference is correctly handled elsewhere.
     153             :   { ARM::t2STMIA, ARM::tSTMIA_UPD, 0,          0,   0,   1,   1,  1,1, 0,1,0 },
     154             :   { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
     155             :   { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
     156             :   };
     157             : 
     158       24215 :   class Thumb2SizeReduce : public MachineFunctionPass {
     159             :   public:
     160             :     static char ID;
     161             : 
     162             :     const Thumb2InstrInfo *TII;
     163             :     const ARMSubtarget *STI;
     164             : 
     165             :     Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
     166             : 
     167             :     bool runOnMachineFunction(MachineFunction &MF) override;
     168             : 
     169        4885 :     MachineFunctionProperties getRequiredProperties() const override {
     170       14655 :       return MachineFunctionProperties().set(
     171       14655 :           MachineFunctionProperties::Property::NoVRegs);
     172             :     }
     173             : 
     174        4883 :     StringRef getPassName() const override {
     175        4883 :       return "Thumb2 instruction size reduction pass";
     176             :     }
     177             : 
     178             :   private:
     179             :     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     180             :     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
     181             : 
     182             :     bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
     183             : 
     184             :     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
     185             :                          bool is2Addr, ARMCC::CondCodes Pred,
     186             :                          bool LiveCPSR, bool &HasCC, bool &CCDead);
     187             : 
     188             :     bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     189             :                          const ReduceEntry &Entry);
     190             : 
     191             :     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     192             :                        const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
     193             : 
     194             :     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     195             :     /// instruction.
     196             :     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     197             :                        const ReduceEntry &Entry, bool LiveCPSR,
     198             :                        bool IsSelfLoop);
     199             : 
     200             :     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     201             :     /// non-two-address instruction.
     202             :     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     203             :                         const ReduceEntry &Entry, bool LiveCPSR,
     204             :                         bool IsSelfLoop);
     205             : 
     206             :     /// ReduceMI - Attempt to reduce MI, return true on success.
     207             :     bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
     208             :                   bool LiveCPSR, bool IsSelfLoop);
     209             : 
     210             :     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     211             :     bool ReduceMBB(MachineBasicBlock &MBB);
     212             : 
     213             :     bool OptimizeSize;
     214             :     bool MinimizeSize;
     215             : 
     216             :     // Last instruction to define CPSR in the current block.
     217             :     MachineInstr *CPSRDef;
     218             :     // Was CPSR last defined by a high latency instruction?
     219             :     // When CPSRDef is null, this refers to CPSR defs in predecessors.
     220             :     bool HighLatencyCPSR;
     221             : 
     222             :     struct MBBInfo {
     223             :       // The flags leaving this block have high latency.
     224             :       bool HighLatencyCPSR = false;
     225             :       // Has this block been visited yet?
     226             :       bool Visited = false;
     227             : 
     228             :       MBBInfo() = default;
     229             :     };
     230             : 
     231             :     SmallVector<MBBInfo, 8> BlockInfo;
     232             : 
     233             :     std::function<bool(const Function &)> PredicateFtor;
     234             :   };
     235             : 
     236             :   char Thumb2SizeReduce::ID = 0;
     237             : 
     238             : } // end anonymous namespace
     239             : 
     240        4897 : Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     241       19588 :     : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
     242        4897 :   OptimizeSize = MinimizeSize = false;
     243      303614 :   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     244      298717 :     unsigned FromOpc = ReduceTable[i].WideOpc;
     245      896151 :     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
     246           0 :       llvm_unreachable("Duplicated entries?");
     247             :   }
     248        4897 : }
     249             : 
     250             : static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
     251         872 :   for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
     252         872 :     if (*Regs == ARM::CPSR)
     253             :       return true;
     254             :   return false;
     255             : }
     256             : 
     257             : // Check for a likely high-latency flag def.
     258             : static bool isHighLatencyCPSR(MachineInstr *Def) {
     259       11488 :   switch(Def->getOpcode()) {
     260             :   case ARM::FMSTAT:
     261             :   case ARM::tMUL:
     262             :     return true;
     263             :   }
     264             :   return false;
     265             : }
     266             : 
     267             : /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
     268             : /// the 's' 16-bit instruction partially update CPSR. Abort the
     269             : /// transformation to avoid adding false dependency on last CPSR setting
     270             : /// instruction which hurts the ability for out-of-order execution engine
     271             : /// to do register renaming magic.
     272             : /// This function checks if there is a read-of-write dependency between the
     273             : /// last instruction that defines the CPSR and the current instruction. If there
     274             : /// is, then there is no harm done since the instruction cannot be retired
     275             : /// before the CPSR setting instruction anyway.
     276             : /// Note, we are not doing full dependency analysis here for the sake of compile
     277             : /// time. We're not looking for cases like:
     278             : /// r0 = muls ...
     279             : /// r1 = add.w r0, ...
     280             : /// ...
     281             : ///    = mul.w r1
     282             : /// In this case it would have been ok to narrow the mul.w to muls since there
     283             : /// are indirect RAW dependency between the muls and the mul.w
     284             : bool
     285        1523 : Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
     286             :   // Disable the check for -Oz (aka OptimizeForSizeHarder).
     287        1523 :   if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
     288             :     return false;
     289             : 
     290          88 :   if (!CPSRDef)
     291             :     // If this BB loops back to itself, conservatively avoid narrowing the
     292             :     // first instruction that does partial flag update.
     293          66 :     return HighLatencyCPSR || FirstInSelfLoop;
     294             : 
     295          44 :   SmallSet<unsigned, 2> Defs;
     296         165 :   for (const MachineOperand &MO : CPSRDef->operands()) {
     297         358 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     298         176 :       continue;
     299          44 :     unsigned Reg = MO.getReg();
     300          66 :     if (Reg == 0 || Reg == ARM::CPSR)
     301          22 :       continue;
     302          22 :     Defs.insert(Reg);
     303             :   }
     304             : 
     305         120 :   for (const MachineOperand &MO : Use->operands()) {
     306         294 :     if (!MO.isReg() || MO.isUndef() || MO.isDef())
     307          56 :       continue;
     308          46 :     unsigned Reg = MO.getReg();
     309          46 :     if (Defs.count(Reg))
     310           4 :       return false;
     311             :   }
     312             : 
     313             :   // If the current CPSR has high latency, try to avoid the false dependency.
     314          18 :   if (HighLatencyCPSR)
     315             :     return true;
     316             : 
     317             :   // tMOVi8 usually doesn't start long dependency chains, and there are a lot
     318             :   // of them, so always shrink them when CPSR doesn't have high latency.
     319          38 :   if (Use->getOpcode() == ARM::t2MOVi ||
     320           4 :       Use->getOpcode() == ARM::t2MOVi16)
     321             :     return false;
     322             : 
     323             :   // No read-after-write dependency. The narrowing will add false dependency.
     324             :   return true;
     325             : }
     326             : 
     327             : bool
     328        4101 : Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
     329             :                                   bool is2Addr, ARMCC::CondCodes Pred,
     330             :                                   bool LiveCPSR, bool &HasCC, bool &CCDead) {
     331        4101 :   if ((is2Addr  && Entry.PredCC2 == 0) ||
     332        3288 :       (!is2Addr && Entry.PredCC1 == 0)) {
     333        2567 :     if (Pred == ARMCC::AL) {
     334             :       // Not predicated, must set CPSR.
     335        2171 :       if (!HasCC) {
     336             :         // Original instruction was not setting CPSR, but CPSR is not
     337             :         // currently live anyway. It's ok to set it. The CPSR def is
     338             :         // dead though.
     339        1978 :         if (!LiveCPSR) {
     340        1854 :           HasCC = true;
     341        1854 :           CCDead = true;
     342        1854 :           return true;
     343             :         }
     344             :         return false;
     345             :       }
     346             :     } else {
     347             :       // Predicated, must not set CPSR.
     348         396 :       if (HasCC)
     349             :         return false;
     350             :     }
     351        1534 :   } else if ((is2Addr  && Entry.PredCC2 == 2) ||
     352        1158 :              (!is2Addr && Entry.PredCC1 == 2)) {
     353             :     /// Old opcode has an optional def of CPSR.
     354         872 :     if (HasCC)
     355             :       return true;
     356             :     // If old opcode does not implicitly define CPSR, then it's not ok since
     357             :     // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
     358        1744 :     if (!HasImplicitCPSRDef(MI->getDesc()))
     359             :       return false;
     360         872 :     HasCC = true;
     361             :   } else {
     362             :     // 16-bit instruction does not set CPSR.
     363         662 :     if (HasCC)
     364             :       return false;
     365             :   }
     366             : 
     367             :   return true;
     368             : }
     369             : 
     370        8292 : static bool VerifyLowRegs(MachineInstr *MI) {
     371       16584 :   unsigned Opc = MI->getOpcode();
     372        8292 :   bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
     373        8292 :   bool isLROk = (Opc == ARM::t2STMDB_UPD);
     374        8292 :   bool isSPOk = isPCOk || isLROk;
     375       47417 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     376       81398 :     const MachineOperand &MO = MI->getOperand(i);
     377       69400 :     if (!MO.isReg() || MO.isImplicit())
     378       12750 :       continue;
     379       27949 :     unsigned Reg = MO.getReg();
     380       27949 :     if (Reg == 0 || Reg == ARM::CPSR)
     381        7091 :       continue;
     382       20858 :     if (isPCOk && Reg == ARM::PC)
     383         842 :       continue;
     384       20016 :     if (isLROk && Reg == ARM::LR)
     385         850 :       continue;
     386       19166 :     if (Reg == ARM::SP) {
     387        6223 :       if (isSPOk)
     388        4062 :         continue;
     389        2161 :       if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
     390             :         // Special case for these ldr / str with sp as base register.
     391        2110 :         continue;
     392             :     }
     393             :     if (!isARMLowRegister(Reg))
     394             :       return false;
     395             :   }
     396             :   return true;
     397             : }
     398             : 
     399             : bool
     400        5733 : Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     401             :                                   const ReduceEntry &Entry) {
     402        5733 :   if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
     403             :     return false;
     404             : 
     405        5733 :   unsigned Scale = 1;
     406        5733 :   bool HasImmOffset = false;
     407        5733 :   bool HasShift = false;
     408        5733 :   bool HasOffReg = true;
     409        5733 :   bool isLdStMul = false;
     410        5733 :   unsigned Opc = Entry.NarrowOpc1;
     411        5733 :   unsigned OpNum = 3; // First 'rest' of operands.
     412        5733 :   uint8_t  ImmLimit = Entry.Imm1Limit;
     413             : 
     414        5733 :   switch (Entry.WideOpc) {
     415           0 :   default:
     416           0 :     llvm_unreachable("Unexpected Thumb2 load / store opcode!");
     417        3418 :   case ARM::t2LDRi12:
     418             :   case ARM::t2STRi12:
     419        3418 :     if (MI->getOperand(1).getReg() == ARM::SP) {
     420        2110 :       Opc = Entry.NarrowOpc2;
     421        2110 :       ImmLimit = Entry.Imm2Limit;
     422             :     }
     423             : 
     424             :     Scale = 4;
     425             :     HasImmOffset = true;
     426             :     HasOffReg = false;
     427             :     break;
     428             :   case ARM::t2LDRBi12:
     429             :   case ARM::t2STRBi12:
     430             :     HasImmOffset = true;
     431             :     HasOffReg = false;
     432             :     break;
     433         108 :   case ARM::t2LDRHi12:
     434             :   case ARM::t2STRHi12:
     435         108 :     Scale = 2;
     436         108 :     HasImmOffset = true;
     437         108 :     HasOffReg = false;
     438             :     break;
     439         126 :   case ARM::t2LDRs:
     440             :   case ARM::t2LDRBs:
     441             :   case ARM::t2LDRHs:
     442             :   case ARM::t2LDRSBs:
     443             :   case ARM::t2LDRSHs:
     444             :   case ARM::t2STRs:
     445             :   case ARM::t2STRBs:
     446             :   case ARM::t2STRHs:
     447         126 :     HasShift = true;
     448         126 :     OpNum = 4;
     449             :     break;
     450          87 :   case ARM::t2LDR_POST:
     451             :   case ARM::t2STR_POST: {
     452         174 :     if (!MBB.getParent()->getFunction()->optForMinSize())
     453             :       return false;
     454             : 
     455          16 :     if (!MI->hasOneMemOperand() ||
     456           8 :         (*MI->memoperands_begin())->getAlignment() < 4)
     457             :       return false;
     458             : 
     459             :     // We're creating a completely different type of load/store - LDM from LDR.
     460             :     // For this reason we can't reuse the logic at the end of this function; we
     461             :     // have to implement the MI building here.
     462           6 :     bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
     463          12 :     unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
     464          12 :     unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
     465           6 :     unsigned Offset = MI->getOperand(3).getImm();
     466           6 :     unsigned PredImm = MI->getOperand(4).getImm();
     467           6 :     unsigned PredReg = MI->getOperand(5).getReg();
     468             :     assert(isARMLowRegister(Rt));
     469             :     assert(isARMLowRegister(Rn));
     470             : 
     471           6 :     if (Offset != 4)
     472             :       return false;
     473             : 
     474             :     // Add the 16-bit load / store instruction.
     475           8 :     DebugLoc dl = MI->getDebugLoc();
     476          16 :     auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
     477           4 :                    .addReg(Rn, RegState::Define)
     478           4 :                    .addReg(Rn)
     479           8 :                    .addImm(PredImm)
     480           4 :                    .addReg(PredReg)
     481           4 :                    .addReg(Rt, IsStore ? 0 : RegState::Define);
     482             : 
     483             :     // Transfer memoperands.
     484          12 :     MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     485             : 
     486             :     // Transfer MI flags.
     487           8 :     MIB.setMIFlags(MI->getFlags());
     488             : 
     489             :     // Kill the old instruction.
     490           4 :     MI->eraseFromBundle();
     491           4 :     ++NumLdSts;
     492           4 :     return true;
     493             :   }
     494          75 :   case ARM::t2LDMIA: {
     495          75 :     unsigned BaseReg = MI->getOperand(0).getReg();
     496             :     assert(isARMLowRegister(BaseReg));
     497             : 
     498             :     // For the non-writeback version (this one), the base register must be
     499             :     // one of the registers being loaded.
     500          75 :     bool isOK = false;
     501         292 :     for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
     502         584 :       if (MI->getOperand(i).getReg() == BaseReg) {
     503             :         isOK = true;
     504             :         break;
     505             :       }
     506             :     }
     507             : 
     508          75 :     if (!isOK)
     509             :       return false;
     510             : 
     511             :     OpNum = 0;
     512             :     isLdStMul = true;
     513             :     break;
     514             :   }
     515           5 :   case ARM::t2STMIA:
     516             :     // If the base register is killed, we don't care what its value is after the
     517             :     // instruction, so we can use an updating STMIA.
     518          10 :     if (!MI->getOperand(0).isKill())
     519             :       return false;
     520             : 
     521             :     break;
     522         842 :   case ARM::t2LDMIA_RET: {
     523         842 :     unsigned BaseReg = MI->getOperand(1).getReg();
     524         842 :     if (BaseReg != ARM::SP)
     525             :       return false;
     526         842 :     Opc = Entry.NarrowOpc2; // tPOP_RET
     527         842 :     OpNum = 2;
     528         842 :     isLdStMul = true;
     529             :     break;
     530             :   }
     531         862 :   case ARM::t2LDMIA_UPD:
     532             :   case ARM::t2STMIA_UPD:
     533             :   case ARM::t2STMDB_UPD: {
     534         862 :     OpNum = 0;
     535             : 
     536         862 :     unsigned BaseReg = MI->getOperand(1).getReg();
     537        1720 :     if (BaseReg == ARM::SP &&
     538         858 :         (Entry.WideOpc == ARM::t2LDMIA_UPD ||
     539             :          Entry.WideOpc == ARM::t2STMDB_UPD)) {
     540         858 :       Opc = Entry.NarrowOpc2; // tPOP or tPUSH
     541         858 :       OpNum = 2;
     542           8 :     } else if (!isARMLowRegister(BaseReg) ||
     543           4 :                (Entry.WideOpc != ARM::t2LDMIA_UPD &&
     544             :                 Entry.WideOpc != ARM::t2STMIA_UPD)) {
     545             :       return false;
     546             :     }
     547             : 
     548             :     isLdStMul = true;
     549             :     break;
     550             :   }
     551             :   }
     552             : 
     553        5646 :   unsigned OffsetReg = 0;
     554        5646 :   bool OffsetKill = false;
     555        5646 :   bool OffsetInternal = false;
     556        5646 :   if (HasShift) {
     557         126 :     OffsetReg  = MI->getOperand(2).getReg();
     558         252 :     OffsetKill = MI->getOperand(2).isKill();
     559         252 :     OffsetInternal = MI->getOperand(2).isInternalRead();
     560             : 
     561         126 :     if (MI->getOperand(3).getImm())
     562             :       // Thumb1 addressing mode doesn't support shift.
     563             :       return false;
     564             :   }
     565             : 
     566        5600 :   unsigned OffsetImm = 0;
     567        5600 :   if (HasImmOffset) {
     568        3736 :     OffsetImm = MI->getOperand(2).getImm();
     569        3736 :     unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
     570             : 
     571        3736 :     if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
     572             :       // Make sure the immediate field fits.
     573             :       return false;
     574             :   }
     575             : 
     576             :   // Add the 16-bit load / store instruction.
     577       14475 :   DebugLoc dl = MI->getDebugLoc();
     578       14475 :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
     579             : 
     580             :   // tSTMIA_UPD takes a defining register operand. We've already checked that
     581             :   // the register is killed, so mark it as dead here.
     582        4825 :   if (Entry.WideOpc == ARM::t2STMIA)
     583           5 :     MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
     584             : 
     585        4825 :   if (!isLdStMul) {
     586        6092 :     MIB.add(MI->getOperand(0));
     587        9138 :     MIB.add(MI->getOperand(1));
     588             : 
     589        3046 :     if (HasImmOffset)
     590        2961 :       MIB.addImm(OffsetImm / Scale);
     591             : 
     592             :     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
     593             : 
     594        3046 :     if (HasOffReg)
     595         170 :       MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
     596          85 :                             getInternalReadRegState(OffsetInternal));
     597             :   }
     598             : 
     599             :   // Transfer the rest of operands.
     600       20180 :   for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
     601       46065 :     MIB.add(MI->getOperand(OpNum));
     602             : 
     603             :   // Transfer memoperands.
     604       14475 :   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     605             : 
     606             :   // Transfer MI flags.
     607        9650 :   MIB.setMIFlags(MI->getFlags());
     608             : 
     609             :   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
     610             : 
     611        4825 :   MBB.erase_instr(MI);
     612        4825 :   ++NumLdSts;
     613        4825 :   return true;
     614             : }
     615             : 
     616             : bool
     617       10835 : Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     618             :                                 const ReduceEntry &Entry,
     619             :                                 bool LiveCPSR, bool IsSelfLoop) {
     620       21670 :   unsigned Opc = MI->getOpcode();
     621       10835 :   if (Opc == ARM::t2ADDri) {
     622             :     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     623             :     // it's a normal reduce.
     624        2357 :     if (MI->getOperand(1).getReg() != ARM::SP) {
     625         729 :       if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     626             :         return true;
     627         584 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     628             :     }
     629             :     // Try to reduce to tADDrSPi.
     630        1628 :     unsigned Imm = MI->getOperand(2).getImm();
     631             :     // The immediate must be in range, the destination register must be a low
     632             :     // reg, the predicate must be "always" and the condition flags must not
     633             :     // be being set.
     634        1628 :     if (Imm & 3 || Imm > 1020)
     635             :       return false;
     636        1454 :     if (!isARMLowRegister(MI->getOperand(0).getReg()))
     637             :       return false;
     638         630 :     if (MI->getOperand(3).getImm() != ARMCC::AL)
     639             :       return false;
     640         630 :     const MCInstrDesc &MCID = MI->getDesc();
     641        1260 :     if (MCID.hasOptionalDef() &&
     642        1260 :         MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
     643             :       return false;
     644             : 
     645             :     MachineInstrBuilder MIB =
     646         630 :         BuildMI(MBB, MI, MI->getDebugLoc(),
     647        1890 :                 TII->get(ARM::tADDrSPi))
     648        1260 :             .add(MI->getOperand(0))
     649        1890 :             .add(MI->getOperand(1))
     650        1260 :             .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
     651        1890 :             .add(predOps(ARMCC::AL));
     652             : 
     653             :     // Transfer MI flags.
     654        1260 :     MIB.setMIFlags(MI->getFlags());
     655             : 
     656             :     DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
     657             : 
     658         630 :     MBB.erase_instr(MI);
     659         630 :     ++NumNarrows;
     660         630 :     return true;
     661             :   }
     662             : 
     663        8478 :   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     664             :     return false;
     665             : 
     666        6904 :   if (MI->mayLoadOrStore())
     667        5733 :     return ReduceLoadStore(MBB, MI, Entry);
     668             : 
     669        1171 :   switch (Opc) {
     670             :   default: break;
     671           0 :   case ARM::t2ADDSri:
     672             :   case ARM::t2ADDSrr: {
     673           0 :     unsigned PredReg = 0;
     674           0 :     if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
     675           0 :       switch (Opc) {
     676             :       default: break;
     677           0 :       case ARM::t2ADDSri:
     678           0 :         if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     679           0 :           return true;
     680             :         LLVM_FALLTHROUGH;
     681             :       case ARM::t2ADDSrr:
     682           0 :         return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     683             :       }
     684             :     }
     685           0 :     break;
     686             :   }
     687         214 :   case ARM::t2RSBri:
     688             :   case ARM::t2RSBSri:
     689             :   case ARM::t2SXTB:
     690             :   case ARM::t2SXTH:
     691             :   case ARM::t2UXTB:
     692             :   case ARM::t2UXTH:
     693         214 :     if (MI->getOperand(2).getImm() == 0)
     694         206 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     695             :     break;
     696         771 :   case ARM::t2MOVi16:
     697             :     // Can convert only 'pure' immediate operands, not immediates obtained as
     698             :     // globals' addresses.
     699        1542 :     if (MI->getOperand(1).isImm())
     700         486 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     701             :     break;
     702         186 :   case ARM::t2CMPrr: {
     703             :     // Try to reduce to the lo-reg only version first. Why there are two
     704             :     // versions of the instruction is a mystery.
     705             :     // It would be nice to just have two entries in the master table that
     706             :     // are prioritized, but the table assumes a unique entry for each
     707             :     // source insn opcode. So for now, we hack a local entry record to use.
     708             :     static const ReduceEntry NarrowEntry =
     709             :       { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
     710         186 :     if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
     711             :       return true;
     712          19 :     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     713             :   }
     714             :   }
     715             :   return false;
     716             : }
     717             : 
     718             : bool
     719        1927 : Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     720             :                                 const ReduceEntry &Entry,
     721             :                                 bool LiveCPSR, bool IsSelfLoop) {
     722        1939 :   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     723             :     return false;
     724             : 
     725        1921 :   if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     726             :     // Don't issue movs with shifter operand for some CPUs unless we
     727             :     // are optimizing for size.
     728             :     return false;
     729             : 
     730        1918 :   unsigned Reg0 = MI->getOperand(0).getReg();
     731        1918 :   unsigned Reg1 = MI->getOperand(1).getReg();
     732             :   // t2MUL is "special". The tied source operand is second, not first.
     733        3836 :   if (MI->getOpcode() == ARM::t2MUL) {
     734          71 :     unsigned Reg2 = MI->getOperand(2).getReg();
     735             :     // Early exit if the regs aren't all low regs.
     736         142 :     if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
     737          65 :         || !isARMLowRegister(Reg2))
     738             :       return false;
     739          65 :     if (Reg0 != Reg2) {
     740             :       // If the other operand also isn't the same as the destination, we
     741             :       // can't reduce.
     742          40 :       if (Reg1 != Reg0)
     743             :         return false;
     744             :       // Try to commute the operands to make it a 2-address instruction.
     745          38 :       MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
     746          38 :       if (!CommutedMI)
     747             :         return false;
     748             :     }
     749        1847 :   } else if (Reg0 != Reg1) {
     750             :     // Try to commute the operands to make it a 2-address instruction.
     751        1099 :     unsigned CommOpIdx1 = 1;
     752        1099 :     unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
     753        1491 :     if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
     754         784 :         MI->getOperand(CommOpIdx2).getReg() != Reg0)
     755         896 :       return false;
     756             :     MachineInstr *CommutedMI =
     757         203 :         TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
     758         203 :     if (!CommutedMI)
     759             :       return false;
     760             :   }
     761        1014 :   if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
     762             :     return false;
     763         881 :   if (Entry.Imm2Limit) {
     764         299 :     unsigned Imm = MI->getOperand(2).getImm();
     765         299 :     unsigned Limit = (1 << Entry.Imm2Limit) - 1;
     766         299 :     if (Imm > Limit)
     767             :       return false;
     768             :   } else {
     769         582 :     unsigned Reg2 = MI->getOperand(2).getReg();
     770         582 :     if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
     771             :       return false;
     772             :   }
     773             : 
     774             :   // Check if it's possible / necessary to transfer the predicate.
     775        1626 :   const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
     776         813 :   unsigned PredReg = 0;
     777         813 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
     778         813 :   bool SkipPred = false;
     779         813 :   if (Pred != ARMCC::AL) {
     780          31 :     if (!NewMCID.isPredicable())
     781             :       // Can't transfer predicate, fail.
     782             :       return false;
     783             :   } else {
     784         782 :     SkipPred = !NewMCID.isPredicable();
     785             :   }
     786             : 
     787         813 :   bool HasCC = false;
     788         813 :   bool CCDead = false;
     789         813 :   const MCInstrDesc &MCID = MI->getDesc();
     790         813 :   if (MCID.hasOptionalDef()) {
     791         750 :     unsigned NumOps = MCID.getNumOperands();
     792        1500 :     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     793         870 :     if (HasCC && MI->getOperand(NumOps-1).isDead())
     794           0 :       CCDead = true;
     795             :   }
     796         813 :   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
     797             :     return false;
     798             : 
     799             :   // Avoid adding a false dependency on partial flag update by some 16-bit
     800             :   // instructions which has the 's' bit set.
     801        1080 :   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
     802         155 :       canAddPseudoFlagDep(MI, IsSelfLoop))
     803             :     return false;
     804             : 
     805             :   // Add the 16-bit instruction.
     806        1524 :   DebugLoc dl = MI->getDebugLoc();
     807         762 :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
     808        1524 :   MIB.add(MI->getOperand(0));
     809         762 :   if (NewMCID.hasOptionalDef())
     810         832 :     MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
     811             : 
     812             :   // Transfer the rest of operands.
     813         762 :   unsigned NumOps = MCID.getNumOperands();
     814        4602 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     815        4544 :     if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
     816         704 :       continue;
     817        3136 :     if (SkipPred && MCID.OpInfo[i].isPredicate())
     818           0 :       continue;
     819        6272 :     MIB.add(MI->getOperand(i));
     820             :   }
     821             : 
     822             :   // Transfer MI flags.
     823        1524 :   MIB.setMIFlags(MI->getFlags());
     824             : 
     825             :   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
     826             : 
     827         762 :   MBB.erase_instr(MI);
     828         762 :   ++Num2Addrs;
     829         762 :   return true;
     830             : }
     831             : 
     832             : bool
     833        4839 : Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     834             :                                  const ReduceEntry &Entry,
     835             :                                  bool LiveCPSR, bool IsSelfLoop) {
     836        4849 :   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     837             :     return false;
     838             : 
     839        4834 :   if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     840             :     // Don't issue movs with shifter operand for some CPUs unless we
     841             :     // are optimizing for size.
     842             :     return false;
     843             : 
     844        4825 :   unsigned Limit = ~0U;
     845        4825 :   if (Entry.Imm1Limit)
     846        4066 :     Limit = (1 << Entry.Imm1Limit) - 1;
     847             : 
     848        4825 :   const MCInstrDesc &MCID = MI->getDesc();
     849       27270 :   for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     850       19157 :     if (MCID.OpInfo[i].isPredicate())
     851        6576 :       continue;
     852       25162 :     const MachineOperand &MO = MI->getOperand(i);
     853       12581 :     if (MO.isReg()) {
     854        8654 :       unsigned Reg = MO.getReg();
     855        8654 :       if (!Reg || Reg == ARM::CPSR)
     856        2100 :         continue;
     857        6554 :       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
     858             :         return false;
     859        3927 :     } else if (MO.isImm() &&
     860        3927 :                !MCID.OpInfo[i].isPredicate()) {
     861        3927 :       if (((unsigned)MO.getImm()) > Limit)
     862             :         return false;
     863             :     }
     864             :   }
     865             : 
     866             :   // Check if it's possible / necessary to transfer the predicate.
     867        6576 :   const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
     868        3288 :   unsigned PredReg = 0;
     869        3288 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
     870        3288 :   bool SkipPred = false;
     871        3288 :   if (Pred != ARMCC::AL) {
     872         492 :     if (!NewMCID.isPredicable())
     873             :       // Can't transfer predicate, fail.
     874             :       return false;
     875             :   } else {
     876        2796 :     SkipPred = !NewMCID.isPredicable();
     877             :   }
     878             : 
     879        3288 :   bool HasCC = false;
     880        3288 :   bool CCDead = false;
     881        3288 :   if (MCID.hasOptionalDef()) {
     882        2100 :     unsigned NumOps = MCID.getNumOperands();
     883        4200 :     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     884        2203 :     if (HasCC && MI->getOperand(NumOps-1).isDead())
     885           0 :       CCDead = true;
     886             :   }
     887        3288 :   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
     888             :     return false;
     889             : 
     890             :   // Avoid adding a false dependency on partial flag update by some 16-bit
     891             :   // instructions which has the 's' bit set.
     892        6272 :   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
     893        1368 :       canAddPseudoFlagDep(MI, IsSelfLoop))
     894             :     return false;
     895             : 
     896             :   // Add the 16-bit instruction.
     897        6332 :   DebugLoc dl = MI->getDebugLoc();
     898        3166 :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
     899        6332 :   MIB.add(MI->getOperand(0));
     900        3166 :   if (NewMCID.hasOptionalDef())
     901        4016 :     MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
     902             : 
     903             :   // Transfer the rest of operands.
     904        3166 :   unsigned NumOps = MCID.getNumOperands();
     905       17018 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     906       15839 :     if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
     907        1987 :       continue;
     908       12071 :     if ((MCID.getOpcode() == ARM::t2RSBSri ||
     909       11775 :          MCID.getOpcode() == ARM::t2RSBri ||
     910       11635 :          MCID.getOpcode() == ARM::t2SXTB ||
     911       11467 :          MCID.getOpcode() == ARM::t2SXTH ||
     912       11295 :          MCID.getOpcode() == ARM::t2UXTB ||
     913       12695 :          MCID.getOpcode() == ARM::t2UXTH) && i == 2)
     914             :       // Skip the zero immediate operand, it's now implicit.
     915         206 :       continue;
     916       21592 :     bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
     917       11659 :     if (SkipPred && isPred)
     918           0 :         continue;
     919       23318 :     const MachineOperand &MO = MI->getOperand(i);
     920       18517 :     if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
     921             :       // Skip implicit def of CPSR. Either it's modeled as an optional
     922             :       // def now or it's already an implicit def on the new instruction.
     923         872 :       continue;
     924             :     MIB.add(MO);
     925             :   }
     926        3166 :   if (!MCID.isPredicable() && NewMCID.isPredicable())
     927           0 :     MIB.add(predOps(ARMCC::AL));
     928             : 
     929             :   // Transfer MI flags.
     930        6332 :   MIB.setMIFlags(MI->getFlags());
     931             : 
     932             :   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
     933             : 
     934        3166 :   MBB.erase_instr(MI);
     935        3166 :   ++NumNarrows;
     936        3166 :   return true;
     937             : }
     938             : 
     939      133041 : static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
     940      133041 :   bool HasDef = false;
     941      749096 :   for (const MachineOperand &MO : MI.operands()) {
     942     1437931 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     943      488635 :       continue;
     944      127420 :     if (MO.getReg() != ARM::CPSR)
     945      121629 :       continue;
     946             : 
     947        5791 :     DefCPSR = true;
     948        5791 :     if (!MO.isDead())
     949        3687 :       HasDef = true;
     950             :   }
     951             : 
     952      133041 :   return HasDef || LiveCPSR;
     953             : }
     954             : 
     955      133041 : static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
     956      745366 :   for (const MachineOperand &MO : MI.operands()) {
     957     1436573 :     if (!MO.isReg() || MO.isUndef() || MO.isDef())
     958      332002 :       continue;
     959      283785 :     if (MO.getReg() != ARM::CPSR)
     960      279709 :       continue;
     961             :     assert(LiveCPSR && "CPSR liveness tracking is wrong!");
     962        4076 :     if (MO.isKill()) {
     963             :       LiveCPSR = false;
     964             :       break;
     965             :     }
     966             :   }
     967             : 
     968      133041 :   return LiveCPSR;
     969             : }
     970             : 
     971      133041 : bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
     972             :                                 bool LiveCPSR, bool IsSelfLoop) {
     973      266082 :   unsigned Opcode = MI->getOpcode();
     974      133041 :   DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
     975      399123 :   if (OPI == ReduceOpcodeMap.end())
     976             :     return false;
     977       15015 :   const ReduceEntry &Entry = ReduceTable[OPI->second];
     978             : 
     979             :   // Don't attempt normal reductions on "special" cases for now.
     980       15015 :   if (Entry.Special)
     981       10835 :     return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     982             : 
     983             :   // Try to transform to a 16-bit two-address instruction.
     984        5378 :   if (Entry.NarrowOpc2 &&
     985        1198 :       ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     986             :     return true;
     987             : 
     988             :   // Try to transform to a 16-bit non-two-address instruction.
     989        6921 :   if (Entry.NarrowOpc1 &&
     990        3358 :       ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     991             :     return true;
     992             : 
     993             :   return false;
     994             : }
     995             : 
     996       15677 : bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
     997       15677 :   bool Modified = false;
     998             : 
     999             :   // Yes, CPSR could be livein.
    1000       15677 :   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
    1001       15677 :   MachineInstr *BundleMI = nullptr;
    1002             : 
    1003       15677 :   CPSRDef = nullptr;
    1004       15677 :   HighLatencyCPSR = false;
    1005             : 
    1006             :   // Check predecessors for the latest CPSRDef.
    1007       36902 :   for (auto *Pred : MBB.predecessors()) {
    1008       11288 :     const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
    1009        5644 :     if (!PInfo.Visited) {
    1010             :       // Since blocks are visited in RPO, this must be a back-edge.
    1011        1005 :       continue;
    1012             :     }
    1013        4639 :     if (PInfo.HighLatencyCPSR) {
    1014          96 :       HighLatencyCPSR = true;
    1015          96 :       break;
    1016             :     }
    1017             :   }
    1018             : 
    1019             :   // If this BB loops back to itself, conservatively avoid narrowing the
    1020             :   // first instruction that does partial flag update.
    1021       15677 :   bool IsSelfLoop = MBB.isSuccessor(&MBB);
    1022       31354 :   MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
    1023       15677 :   MachineBasicBlock::instr_iterator NextMII;
    1024      149581 :   for (; MII != E; MII = NextMII) {
    1025      133904 :     NextMII = std::next(MII);
    1026             : 
    1027      133904 :     MachineInstr *MI = &*MII;
    1028      134633 :     if (MI->isBundle()) {
    1029         729 :       BundleMI = MI;
    1030        1592 :       continue;
    1031             :     }
    1032      133175 :     if (MI->isDebugValue())
    1033         134 :       continue;
    1034             : 
    1035      133041 :     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
    1036             : 
    1037             :     // Does NextMII belong to the same bundle as MI?
    1038      367955 :     bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
    1039             : 
    1040      133041 :     if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
    1041             :       Modified = true;
    1042        9387 :       MachineBasicBlock::instr_iterator I = std::prev(NextMII);
    1043        9387 :       MI = &*I;
    1044             :       // Removing and reinserting the first instruction in a bundle will break
    1045             :       // up the bundle. Fix the bundling if it was broken.
    1046        9587 :       if (NextInSameBundle && !NextMII->isBundledWithPred())
    1047           0 :         NextMII->bundleWithPred();
    1048             :     }
    1049             : 
    1050      135517 :     if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
    1051             :       // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
    1052             :       // marker is only on the BUNDLE instruction. Process the BUNDLE
    1053             :       // instruction as we finish with the bundled instruction to work around
    1054             :       // the inconsistency.
    1055         729 :       if (BundleMI->killsRegister(ARM::CPSR))
    1056         603 :         LiveCPSR = false;
    1057          37 :       MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
    1058          74 :       if (MO && !MO->isDead())
    1059             :         LiveCPSR = true;
    1060         684 :       MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
    1061        1368 :       if (MO && !MO->isKill())
    1062             :         LiveCPSR = true;
    1063             :     }
    1064             : 
    1065      133041 :     bool DefCPSR = false;
    1066      133041 :     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
    1067      133041 :     if (MI->isCall()) {
    1068             :       // Calls don't really set CPSR.
    1069        6685 :       CPSRDef = nullptr;
    1070        6685 :       HighLatencyCPSR = false;
    1071        6685 :       IsSelfLoop = false;
    1072      126356 :     } else if (DefCPSR) {
    1073             :       // This is the last CPSR defining instruction.
    1074        5744 :       CPSRDef = MI;
    1075       11488 :       HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
    1076        5744 :       IsSelfLoop = false;
    1077             :     }
    1078             :   }
    1079             : 
    1080       31354 :   MBBInfo &Info = BlockInfo[MBB.getNumber()];
    1081       15677 :   Info.HighLatencyCPSR = HighLatencyCPSR;
    1082       15677 :   Info.Visited = true;
    1083       15677 :   return Modified;
    1084             : }
    1085             : 
    1086       22823 : bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
    1087       56495 :   if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
    1088             :     return false;
    1089             : 
    1090       12626 :   STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
    1091       24342 :   if (STI->isThumb1Only() || STI->prefers32BitThumb())
    1092             :     return false;
    1093             : 
    1094       11715 :   TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
    1095             : 
    1096             :   // Optimizing / minimizing size? Minimizing size implies optimizing for size.
    1097       11715 :   OptimizeSize = MF.getFunction()->optForSize();
    1098       23430 :   MinimizeSize = MF.getFunction()->optForMinSize();
    1099             : 
    1100       23430 :   BlockInfo.clear();
    1101       23430 :   BlockInfo.resize(MF.getNumBlockIDs());
    1102             : 
    1103             :   // Visit blocks in reverse post-order so LastCPSRDef is known for all
    1104             :   // predecessors.
    1105       11715 :   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
    1106       11715 :   bool Modified = false;
    1107             :   for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
    1108       39107 :        I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
    1109       15677 :     Modified |= ReduceMBB(**I);
    1110       11715 :   return Modified;
    1111             : }
    1112             : 
    1113             : /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
    1114             : /// reduction pass.
    1115        4897 : FunctionPass *llvm::createThumb2SizeReductionPass(
    1116             :     std::function<bool(const Function &)> Ftor) {
    1117       14691 :   return new Thumb2SizeReduce(std::move(Ftor));
    1118      216918 : }

Generated by: LCOV version 1.13