LCOV - code coverage report
Current view: top level - lib/Target/ARM - Thumb2SizeReduction.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 366 384 95.3 %
Date: 2018-07-13 00:08:38 Functions: 22 22 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "ARM.h"
      11             : #include "ARMBaseInstrInfo.h"
      12             : #include "ARMSubtarget.h"
      13             : #include "MCTargetDesc/ARMBaseInfo.h"
      14             : #include "Thumb2InstrInfo.h"
      15             : #include "llvm/ADT/DenseMap.h"
      16             : #include "llvm/ADT/PostOrderIterator.h"
      17             : #include "llvm/ADT/STLExtras.h"
      18             : #include "llvm/ADT/SmallSet.h"
      19             : #include "llvm/ADT/SmallVector.h"
      20             : #include "llvm/ADT/Statistic.h"
      21             : #include "llvm/ADT/StringRef.h"
      22             : #include "llvm/CodeGen/MachineBasicBlock.h"
      23             : #include "llvm/CodeGen/MachineFunction.h"
      24             : #include "llvm/CodeGen/MachineFunctionPass.h"
      25             : #include "llvm/CodeGen/MachineInstr.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : #include "llvm/CodeGen/MachineOperand.h"
      28             : #include "llvm/CodeGen/TargetInstrInfo.h"
      29             : #include "llvm/IR/DebugLoc.h"
      30             : #include "llvm/IR/Function.h"
      31             : #include "llvm/MC/MCInstrDesc.h"
      32             : #include "llvm/MC/MCRegisterInfo.h"
      33             : #include "llvm/Support/CommandLine.h"
      34             : #include "llvm/Support/Compiler.h"
      35             : #include "llvm/Support/Debug.h"
      36             : #include "llvm/Support/ErrorHandling.h"
      37             : #include "llvm/Support/raw_ostream.h"
      38             : #include <algorithm>
      39             : #include <cassert>
      40             : #include <cstdint>
      41             : #include <functional>
      42             : #include <iterator>
      43             : #include <utility>
      44             : 
      45             : using namespace llvm;
      46             : 
      47             : #define DEBUG_TYPE "t2-reduce-size"
      48             : #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
      49             : 
      50             : STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
      51             : STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
      52             : STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
      53             : 
      54       99743 : static cl::opt<int> ReduceLimit("t2-reduce-limit",
      55       99743 :                                 cl::init(-1), cl::Hidden);
      56       99743 : static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
      57       99743 :                                      cl::init(-1), cl::Hidden);
      58       99743 : static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
      59       99743 :                                      cl::init(-1), cl::Hidden);
      60             : 
      61             : namespace {
      62             : 
      63             :   /// ReduceTable - A static table with information on mapping from wide
      64             :   /// opcodes to narrow
      65             :   struct ReduceEntry {
      66             :     uint16_t WideOpc;      // Wide opcode
      67             :     uint16_t NarrowOpc1;   // Narrow opcode to transform to
      68             :     uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
      69             :     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
      70             :     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
      71             :     unsigned LowRegs1 : 1; // Only possible if low-registers are used
      72             :     unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
      73             :     unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
      74             :                            // 1 - No cc field.
      75             :                            // 2 - Always set CPSR.
      76             :     unsigned PredCC2  : 2;
      77             :     unsigned PartFlag : 1; // 16-bit instruction does partial flag update
      78             :     unsigned Special  : 1; // Needs to be dealt with specially
      79             :     unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
      80             :   };
      81             : 
      82             :   static const ReduceEntry ReduceTable[] = {
      83             :   // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
      84             :   { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
      85             :   { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
      86             :   { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
      87             :   { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
      88             :   { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
      89             :   { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
      90             :   { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
      91             :   { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
      92             :   { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
      93             :   //FIXME: Disable CMN, as CCodes are backwards from compare expectations
      94             :   //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
      95             :   { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
      96             :   { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
      97             :   { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
      98             :   { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
      99             :   // FIXME: adr.n immediate offset must be multiple of 4.
     100             :   //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
     101             :   { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
     102             :   { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
     103             :   { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
     104             :   { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
     105             :   { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
     106             :   { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
     107             :   // FIXME: Do we need the 16-bit 'S' variant?
     108             :   { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
     109             :   { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
     110             :   { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
     111             :   { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
     112             :   { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
     113             :   { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
     114             :   { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
     115             :   { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
     116             :   { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
     117             :   { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
     118             :   { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
     119             :   { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
     120             :   { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
     121             :   { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
     122             :   { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
     123             :   { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     124             :   { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     125             :   { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
     126             :   { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     127             :   { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
     128             : 
     129             :   // FIXME: Clean this up after splitting each Thumb load / store opcode
     130             :   // into multiple ones.
     131             :   { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
     132             :   { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
     133             :   { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     134             :   { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     135             :   { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     136             :   { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     137             :   { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     138             :   { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     139             :   { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
     140             :   { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
     141             :   { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
     142             :   { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     143             :   { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     144             :   { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
     145             :   { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
     146             :   { ARM::t2STR_POST,ARM::tSTMIA_UPD,0,         0,   0,   1,   0,  0,0, 0,1,0 },
     147             : 
     148             :   { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
     149             :   { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
     150             :   { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
     151             :   // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
     152             :   // tSTMIA_UPD is a change in semantics which can only be used if the base
     153             :   // register is killed. This difference is correctly handled elsewhere.
     154             :   { ARM::t2STMIA, ARM::tSTMIA_UPD, 0,          0,   0,   1,   1,  1,1, 0,1,0 },
     155             :   { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
     156             :   { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
     157             :   };
     158             : 
     159       15483 :   class Thumb2SizeReduce : public MachineFunctionPass {
     160             :   public:
     161             :     static char ID;
     162             : 
     163             :     const Thumb2InstrInfo *TII;
     164             :     const ARMSubtarget *STI;
     165             : 
     166             :     Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
     167             : 
     168             :     bool runOnMachineFunction(MachineFunction &MF) override;
     169             : 
     170        5191 :     MachineFunctionProperties getRequiredProperties() const override {
     171       10382 :       return MachineFunctionProperties().set(
     172        5191 :           MachineFunctionProperties::Property::NoVRegs);
     173             :     }
     174             : 
     175        5189 :     StringRef getPassName() const override {
     176        5189 :       return THUMB2_SIZE_REDUCE_NAME;
     177             :     }
     178             : 
     179             :   private:
     180             :     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     181             :     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
     182             : 
     183             :     bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
     184             : 
     185             :     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
     186             :                          bool is2Addr, ARMCC::CondCodes Pred,
     187             :                          bool LiveCPSR, bool &HasCC, bool &CCDead);
     188             : 
     189             :     bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     190             :                          const ReduceEntry &Entry);
     191             : 
     192             :     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     193             :                        const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
     194             : 
     195             :     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     196             :     /// instruction.
     197             :     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     198             :                        const ReduceEntry &Entry, bool LiveCPSR,
     199             :                        bool IsSelfLoop);
     200             : 
     201             :     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     202             :     /// non-two-address instruction.
     203             :     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     204             :                         const ReduceEntry &Entry, bool LiveCPSR,
     205             :                         bool IsSelfLoop);
     206             : 
     207             :     /// ReduceMI - Attempt to reduce MI, return true on success.
     208             :     bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
     209             :                   bool LiveCPSR, bool IsSelfLoop);
     210             : 
     211             :     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     212             :     bool ReduceMBB(MachineBasicBlock &MBB);
     213             : 
     214             :     bool OptimizeSize;
     215             :     bool MinimizeSize;
     216             : 
     217             :     // Last instruction to define CPSR in the current block.
     218             :     MachineInstr *CPSRDef;
     219             :     // Was CPSR last defined by a high latency instruction?
     220             :     // When CPSRDef is null, this refers to CPSR defs in predecessors.
     221             :     bool HighLatencyCPSR;
     222             : 
     223             :     struct MBBInfo {
     224             :       // The flags leaving this block have high latency.
     225             :       bool HighLatencyCPSR = false;
     226             :       // Has this block been visited yet?
     227             :       bool Visited = false;
     228             : 
     229             :       MBBInfo() = default;
     230             :     };
     231             : 
     232             :     SmallVector<MBBInfo, 8> BlockInfo;
     233             : 
     234             :     std::function<bool(const Function &)> PredicateFtor;
     235             :   };
     236             : 
     237             :   char Thumb2SizeReduce::ID = 0;
     238             : 
     239             : } // end anonymous namespace
     240             : 
     241      342570 : INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
     242             :                 false)
     243             : 
     244        5215 : Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     245        5215 :     : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
     246        5215 :   OptimizeSize = MinimizeSize = false;
     247      641445 :   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     248      318115 :     unsigned FromOpc = ReduceTable[i].WideOpc;
     249      636230 :     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
     250           0 :       llvm_unreachable("Duplicated entries?");
     251             :   }
     252        5215 : }
     253             : 
     254             : static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
     255         856 :   for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
     256         856 :     if (*Regs == ARM::CPSR)
     257             :       return true;
     258             :   return false;
     259             : }
     260             : 
     261             : // Check for a likely high-latency flag def.
     262             : static bool isHighLatencyCPSR(MachineInstr *Def) {
     263       12886 :   switch(Def->getOpcode()) {
     264             :   case ARM::FMSTAT:
     265             :   case ARM::tMUL:
     266             :     return true;
     267             :   }
     268             :   return false;
     269             : }
     270             : 
     271             : /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
     272             : /// the 's' 16-bit instruction partially update CPSR. Abort the
     273             : /// transformation to avoid adding false dependency on last CPSR setting
     274             : /// instruction which hurts the ability for out-of-order execution engine
     275             : /// to do register renaming magic.
     276             : /// This function checks if there is a read-of-write dependency between the
     277             : /// last instruction that defines the CPSR and the current instruction. If there
     278             : /// is, then there is no harm done since the instruction cannot be retired
     279             : /// before the CPSR setting instruction anyway.
     280             : /// Note, we are not doing full dependency analysis here for the sake of compile
     281             : /// time. We're not looking for cases like:
     282             : /// r0 = muls ...
     283             : /// r1 = add.w r0, ...
     284             : /// ...
     285             : ///    = mul.w r1
     286             : /// In this case it would have been ok to narrow the mul.w to muls since there
     287             : /// are indirect RAW dependency between the muls and the mul.w
     288             : bool
     289        1749 : Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
     290             :   // Disable the check for -Oz (aka OptimizeForSizeHarder).
     291        1749 :   if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
     292             :     return false;
     293             : 
     294          92 :   if (!CPSRDef)
     295             :     // If this BB loops back to itself, conservatively avoid narrowing the
     296             :     // first instruction that does partial flag update.
     297          68 :     return HighLatencyCPSR || FirstInSelfLoop;
     298             : 
     299          24 :   SmallSet<unsigned, 2> Defs;
     300         286 :   for (const MachineOperand &MO : CPSRDef->operands()) {
     301         389 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     302         192 :       continue;
     303          47 :     unsigned Reg = MO.getReg();
     304          71 :     if (Reg == 0 || Reg == ARM::CPSR)
     305          24 :       continue;
     306          23 :     Defs.insert(Reg);
     307             :   }
     308             : 
     309         234 :   for (const MachineOperand &MO : Use->operands()) {
     310         319 :     if (!MO.isReg() || MO.isUndef() || MO.isDef())
     311          59 :       continue;
     312          51 :     unsigned Reg = MO.getReg();
     313          51 :     if (Defs.count(Reg))
     314           5 :       return false;
     315             :   }
     316             : 
     317             :   // If the current CPSR has high latency, try to avoid the false dependency.
     318          19 :   if (HighLatencyCPSR)
     319             :     return true;
     320             : 
     321             :   // tMOVi8 usually doesn't start long dependency chains, and there are a lot
     322             :   // of them, so always shrink them when CPSR doesn't have high latency.
     323          34 :   if (Use->getOpcode() == ARM::t2MOVi ||
     324             :       Use->getOpcode() == ARM::t2MOVi16)
     325             :     return false;
     326             : 
     327             :   // No read-after-write dependency. The narrowing will add false dependency.
     328             :   return true;
     329             : }
     330             : 
     331             : bool
     332        4622 : Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
     333             :                                   bool is2Addr, ARMCC::CondCodes Pred,
     334             :                                   bool LiveCPSR, bool &HasCC, bool &CCDead) {
     335        4622 :   if ((is2Addr  && Entry.PredCC2 == 0) ||
     336        3658 :       (!is2Addr && Entry.PredCC1 == 0)) {
     337        2911 :     if (Pred == ARMCC::AL) {
     338             :       // Not predicated, must set CPSR.
     339        2449 :       if (!HasCC) {
     340             :         // Original instruction was not setting CPSR, but CPSR is not
     341             :         // currently live anyway. It's ok to set it. The CPSR def is
     342             :         // dead though.
     343        2238 :         if (!LiveCPSR) {
     344        2120 :           HasCC = true;
     345        2120 :           CCDead = true;
     346        2120 :           return true;
     347             :         }
     348             :         return false;
     349             :       }
     350             :     } else {
     351             :       // Predicated, must not set CPSR.
     352         462 :       if (HasCC)
     353             :         return false;
     354             :     }
     355        1711 :   } else if ((is2Addr  && Entry.PredCC2 == 2) ||
     356        1244 :              (!is2Addr && Entry.PredCC1 == 2)) {
     357             :     /// Old opcode has an optional def of CPSR.
     358         856 :     if (HasCC)
     359             :       return true;
     360             :     // If old opcode does not implicitly define CPSR, then it's not ok since
     361             :     // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
     362        1712 :     if (!HasImplicitCPSRDef(MI->getDesc()))
     363             :       return false;
     364         856 :     HasCC = true;
     365             :   } else {
     366             :     // 16-bit instruction does not set CPSR.
     367         855 :     if (HasCC)
     368             :       return false;
     369             :   }
     370             : 
     371             :   return true;
     372             : }
     373             : 
     374        9146 : static bool VerifyLowRegs(MachineInstr *MI) {
     375        9146 :   unsigned Opc = MI->getOpcode();
     376        9146 :   bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
     377        9146 :   bool isLROk = (Opc == ARM::t2STMDB_UPD);
     378        9146 :   bool isSPOk = isPCOk || isLROk;
     379       52621 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     380       45104 :     const MachineOperand &MO = MI->getOperand(i);
     381       76724 :     if (!MO.isReg() || MO.isImplicit())
     382       14294 :       continue;
     383       30810 :     unsigned Reg = MO.getReg();
     384       30810 :     if (Reg == 0 || Reg == ARM::CPSR)
     385        7928 :       continue;
     386       22882 :     if (isPCOk && Reg == ARM::PC)
     387         913 :       continue;
     388       21969 :     if (isLROk && Reg == ARM::LR)
     389         922 :       continue;
     390       21047 :     if (Reg == ARM::SP) {
     391        6668 :       if (isSPOk)
     392        4400 :         continue;
     393        2268 :       if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
     394             :         // Special case for these ldr / str with sp as base register.
     395        2188 :         continue;
     396             :     }
     397             :     if (!isARMLowRegister(Reg))
     398             :       return false;
     399             :   }
     400             :   return true;
     401             : }
     402             : 
     403             : bool
     404        6294 : Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     405             :                                   const ReduceEntry &Entry) {
     406        6294 :   if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
     407             :     return false;
     408             : 
     409             :   unsigned Scale = 1;
     410             :   bool HasImmOffset = false;
     411             :   bool HasShift = false;
     412             :   bool HasOffReg = true;
     413             :   bool isLdStMul = false;
     414        6294 :   unsigned Opc = Entry.NarrowOpc1;
     415             :   unsigned OpNum = 3; // First 'rest' of operands.
     416        6294 :   uint8_t  ImmLimit = Entry.Imm1Limit;
     417             : 
     418        6294 :   switch (Entry.WideOpc) {
     419           0 :   default:
     420           0 :     llvm_unreachable("Unexpected Thumb2 load / store opcode!");
     421        3734 :   case ARM::t2LDRi12:
     422             :   case ARM::t2STRi12:
     423        3734 :     if (MI->getOperand(1).getReg() == ARM::SP) {
     424        2188 :       Opc = Entry.NarrowOpc2;
     425        2188 :       ImmLimit = Entry.Imm2Limit;
     426             :     }
     427             : 
     428             :     Scale = 4;
     429             :     HasImmOffset = true;
     430             :     HasOffReg = false;
     431             :     break;
     432             :   case ARM::t2LDRBi12:
     433             :   case ARM::t2STRBi12:
     434             :     HasImmOffset = true;
     435             :     HasOffReg = false;
     436             :     break;
     437         157 :   case ARM::t2LDRHi12:
     438             :   case ARM::t2STRHi12:
     439             :     Scale = 2;
     440             :     HasImmOffset = true;
     441             :     HasOffReg = false;
     442             :     break;
     443         129 :   case ARM::t2LDRs:
     444             :   case ARM::t2LDRBs:
     445             :   case ARM::t2LDRHs:
     446             :   case ARM::t2LDRSBs:
     447             :   case ARM::t2LDRSHs:
     448             :   case ARM::t2STRs:
     449             :   case ARM::t2STRBs:
     450             :   case ARM::t2STRHs:
     451             :     HasShift = true;
     452             :     OpNum = 4;
     453             :     break;
     454          88 :   case ARM::t2LDR_POST:
     455             :   case ARM::t2STR_POST: {
     456         176 :     if (!MBB.getParent()->getFunction().optForMinSize())
     457             :       return false;
     458             : 
     459          16 :     if (!MI->hasOneMemOperand() ||
     460           8 :         (*MI->memoperands_begin())->getAlignment() < 4)
     461             :       return false;
     462             : 
     463             :     // We're creating a completely different type of load/store - LDM from LDR.
     464             :     // For this reason we can't reuse the logic at the end of this function; we
     465             :     // have to implement the MI building here.
     466           6 :     bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
     467          12 :     unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
     468          12 :     unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
     469           6 :     unsigned Offset = MI->getOperand(3).getImm();
     470           6 :     unsigned PredImm = MI->getOperand(4).getImm();
     471           6 :     unsigned PredReg = MI->getOperand(5).getReg();
     472             :     assert(isARMLowRegister(Rt));
     473             :     assert(isARMLowRegister(Rn));
     474             : 
     475           6 :     if (Offset != 4)
     476             :       return false;
     477             : 
     478             :     // Add the 16-bit load / store instruction.
     479             :     DebugLoc dl = MI->getDebugLoc();
     480          12 :     auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
     481           4 :                    .addReg(Rn, RegState::Define)
     482           4 :                    .addReg(Rn)
     483             :                    .addImm(PredImm)
     484           4 :                    .addReg(PredReg)
     485           4 :                    .addReg(Rt, IsStore ? 0 : RegState::Define);
     486             : 
     487             :     // Transfer memoperands.
     488           4 :     MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     489             : 
     490             :     // Transfer MI flags.
     491           4 :     MIB.setMIFlags(MI->getFlags());
     492             : 
     493             :     // Kill the old instruction.
     494           4 :     MI->eraseFromBundle();
     495             :     ++NumLdSts;
     496             :     return true;
     497             :   }
     498          77 :   case ARM::t2LDMIA: {
     499          77 :     unsigned BaseReg = MI->getOperand(0).getReg();
     500             :     assert(isARMLowRegister(BaseReg));
     501             : 
     502             :     // For the non-writeback version (this one), the base register must be
     503             :     // one of the registers being loaded.
     504             :     bool isOK = false;
     505         523 :     for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
     506         299 :       if (MI->getOperand(i).getReg() == BaseReg) {
     507             :         isOK = true;
     508             :         break;
     509             :       }
     510             :     }
     511             : 
     512          77 :     if (!isOK)
     513             :       return false;
     514             : 
     515             :     OpNum = 0;
     516             :     isLdStMul = true;
     517             :     break;
     518             :   }
     519           6 :   case ARM::t2STMIA:
     520             :     // If the base register is killed, we don't care what its value is after the
     521             :     // instruction, so we can use an updating STMIA.
     522          12 :     if (!MI->getOperand(0).isKill())
     523             :       return false;
     524             : 
     525             :     break;
     526         913 :   case ARM::t2LDMIA_RET: {
     527         913 :     unsigned BaseReg = MI->getOperand(1).getReg();
     528         913 :     if (BaseReg != ARM::SP)
     529             :       return false;
     530         913 :     Opc = Entry.NarrowOpc2; // tPOP_RET
     531             :     OpNum = 2;
     532             :     isLdStMul = true;
     533             :     break;
     534             :   }
     535         930 :   case ARM::t2LDMIA_UPD:
     536             :   case ARM::t2STMIA_UPD:
     537             :   case ARM::t2STMDB_UPD: {
     538             :     OpNum = 0;
     539             : 
     540         930 :     unsigned BaseReg = MI->getOperand(1).getReg();
     541        1856 :     if (BaseReg == ARM::SP &&
     542         926 :         (Entry.WideOpc == ARM::t2LDMIA_UPD ||
     543             :          Entry.WideOpc == ARM::t2STMDB_UPD)) {
     544         926 :       Opc = Entry.NarrowOpc2; // tPOP or tPUSH
     545             :       OpNum = 2;
     546           4 :     } else if (!isARMLowRegister(BaseReg) ||
     547           4 :                (Entry.WideOpc != ARM::t2LDMIA_UPD &&
     548             :                 Entry.WideOpc != ARM::t2STMIA_UPD)) {
     549             :       return false;
     550             :     }
     551             : 
     552             :     isLdStMul = true;
     553             :     break;
     554             :   }
     555             :   }
     556             : 
     557             :   unsigned OffsetReg = 0;
     558             :   bool OffsetKill = false;
     559             :   bool OffsetInternal = false;
     560        6205 :   if (HasShift) {
     561         129 :     OffsetReg  = MI->getOperand(2).getReg();
     562             :     OffsetKill = MI->getOperand(2).isKill();
     563             :     OffsetInternal = MI->getOperand(2).isInternalRead();
     564             : 
     565         129 :     if (MI->getOperand(3).getImm())
     566             :       // Thumb1 addressing mode doesn't support shift.
     567             :       return false;
     568             :   }
     569             : 
     570             :   unsigned OffsetImm = 0;
     571        6158 :   if (HasImmOffset) {
     572        4151 :     OffsetImm = MI->getOperand(2).getImm();
     573        4151 :     unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
     574             : 
     575        4151 :     if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
     576             :       // Make sure the immediate field fits.
     577             :       return false;
     578             :   }
     579             : 
     580             :   // Add the 16-bit load / store instruction.
     581             :   DebugLoc dl = MI->getDebugLoc();
     582       10664 :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
     583             : 
     584             :   // tSTMIA_UPD takes a defining register operand. We've already checked that
     585             :   // the register is killed, so mark it as dead here.
     586        5332 :   if (Entry.WideOpc == ARM::t2STMIA)
     587           6 :     MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
     588             : 
     589        5332 :   if (!isLdStMul) {
     590        3413 :     MIB.add(MI->getOperand(0));
     591        3413 :     MIB.add(MI->getOperand(1));
     592             : 
     593        3413 :     if (HasImmOffset)
     594        3325 :       MIB.addImm(OffsetImm / Scale);
     595             : 
     596             :     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
     597             : 
     598        3413 :     if (HasOffReg)
     599          88 :       MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
     600          88 :                             getInternalReadRegState(OffsetInternal));
     601             :   }
     602             : 
     603             :   // Transfer the rest of operands.
     604       22121 :   for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
     605       16789 :     MIB.add(MI->getOperand(OpNum));
     606             : 
     607             :   // Transfer memoperands.
     608        5332 :   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     609             : 
     610             :   // Transfer MI flags.
     611        5332 :   MIB.setMIFlags(MI->getFlags());
     612             : 
     613             :   LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
     614             :                     << "       to 16-bit: " << *MIB);
     615             : 
     616             :   MBB.erase_instr(MI);
     617             :   ++NumLdSts;
     618             :   return true;
     619             : }
     620             : 
     621             : bool
     622       11783 : Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     623             :                                 const ReduceEntry &Entry,
     624             :                                 bool LiveCPSR, bool IsSelfLoop) {
     625       11783 :   unsigned Opc = MI->getOpcode();
     626       11783 :   if (Opc == ARM::t2ADDri) {
     627             :     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     628             :     // it's a normal reduce.
     629        2449 :     if (MI->getOperand(1).getReg() != ARM::SP) {
     630         770 :       if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     631             :         return true;
     632         607 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     633             :     }
     634             :     // Try to reduce to tADDrSPi.
     635        1679 :     unsigned Imm = MI->getOperand(2).getImm();
     636             :     // The immediate must be in range, the destination register must be a low
     637             :     // reg, the predicate must be "always" and the condition flags must not
     638             :     // be being set.
     639        1679 :     if (Imm & 3 || Imm > 1020)
     640             :       return false;
     641         850 :     if (!isARMLowRegister(MI->getOperand(0).getReg()))
     642             :       return false;
     643         655 :     if (MI->getOperand(3).getImm() != ARMCC::AL)
     644             :       return false;
     645             :     const MCInstrDesc &MCID = MI->getDesc();
     646        1965 :     if (MCID.hasOptionalDef() &&
     647        1965 :         MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
     648             :       return false;
     649             : 
     650             :     MachineInstrBuilder MIB =
     651             :         BuildMI(MBB, MI, MI->getDebugLoc(),
     652         654 :                 TII->get(ARM::tADDrSPi))
     653         654 :             .add(MI->getOperand(0))
     654         654 :             .add(MI->getOperand(1))
     655         654 :             .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
     656         654 :             .add(predOps(ARMCC::AL));
     657             : 
     658             :     // Transfer MI flags.
     659         654 :     MIB.setMIFlags(MI->getFlags());
     660             : 
     661             :     LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
     662             :                       << "       to 16-bit: " << *MIB);
     663             : 
     664             :     MBB.erase_instr(MI);
     665             :     ++NumNarrows;
     666         654 :     return true;
     667             :   }
     668             : 
     669        9334 :   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     670             :     return false;
     671             : 
     672        7705 :   if (MI->mayLoadOrStore())
     673        6294 :     return ReduceLoadStore(MBB, MI, Entry);
     674             : 
     675        1411 :   switch (Opc) {
     676             :   default: break;
     677           0 :   case ARM::t2ADDSri:
     678             :   case ARM::t2ADDSrr: {
     679           0 :     unsigned PredReg = 0;
     680           0 :     if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
     681           0 :       switch (Opc) {
     682             :       default: break;
     683           0 :       case ARM::t2ADDSri:
     684           0 :         if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     685           0 :           return true;
     686             :         LLVM_FALLTHROUGH;
     687             :       case ARM::t2ADDSrr:
     688           0 :         return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     689             :       }
     690             :     }
     691           0 :     break;
     692             :   }
     693         324 :   case ARM::t2RSBri:
     694             :   case ARM::t2RSBSri:
     695             :   case ARM::t2SXTB:
     696             :   case ARM::t2SXTH:
     697             :   case ARM::t2UXTB:
     698             :   case ARM::t2UXTH:
     699         324 :     if (MI->getOperand(2).getImm() == 0)
     700         312 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     701             :     break;
     702         899 :   case ARM::t2MOVi16:
     703             :     // Can convert only 'pure' immediate operands, not immediates obtained as
     704             :     // globals' addresses.
     705        1798 :     if (MI->getOperand(1).isImm())
     706         591 :       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     707             :     break;
     708         188 :   case ARM::t2CMPrr: {
     709             :     // Try to reduce to the lo-reg only version first. Why there are two
     710             :     // versions of the instruction is a mystery.
     711             :     // It would be nice to just have two entries in the master table that
     712             :     // are prioritized, but the table assumes a unique entry for each
     713             :     // source insn opcode. So for now, we hack a local entry record to use.
     714             :     static const ReduceEntry NarrowEntry =
     715             :       { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
     716         188 :     if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
     717             :       return true;
     718          19 :     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     719             :   }
     720             :   }
     721             :   return false;
     722             : }
     723             : 
     724             : bool
     725        2126 : Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     726             :                                 const ReduceEntry &Entry,
     727             :                                 bool LiveCPSR, bool IsSelfLoop) {
     728        2138 :   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     729             :     return false;
     730             : 
     731        2120 :   if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     732             :     // Don't issue movs with shifter operand for some CPUs unless we
     733             :     // are optimizing for size.
     734             :     return false;
     735             : 
     736        2117 :   unsigned Reg0 = MI->getOperand(0).getReg();
     737        2117 :   unsigned Reg1 = MI->getOperand(1).getReg();
     738             :   // t2MUL is "special". The tied source operand is second, not first.
     739        4234 :   if (MI->getOpcode() == ARM::t2MUL) {
     740          89 :     unsigned Reg2 = MI->getOperand(2).getReg();
     741             :     // Early exit if the regs aren't all low regs.
     742             :     if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
     743             :         || !isARMLowRegister(Reg2))
     744             :       return false;
     745          83 :     if (Reg0 != Reg2) {
     746             :       // If the other operand also isn't the same as the destination, we
     747             :       // can't reduce.
     748          41 :       if (Reg1 != Reg0)
     749             :         return false;
     750             :       // Try to commute the operands to make it a 2-address instruction.
     751          36 :       MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
     752          36 :       if (!CommutedMI)
     753             :         return false;
     754             :     }
     755        2028 :   } else if (Reg0 != Reg1) {
     756             :     // Try to commute the operands to make it a 2-address instruction.
     757        1214 :     unsigned CommOpIdx1 = 1;
     758        1214 :     unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
     759        1693 :     if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
     760         958 :         MI->getOperand(CommOpIdx2).getReg() != Reg0)
     761         928 :       return false;
     762             :     MachineInstr *CommutedMI =
     763         286 :         TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
     764         286 :     if (!CommutedMI)
     765             :       return false;
     766             :   }
     767        1178 :   if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
     768             :     return false;
     769        1044 :   if (Entry.Imm2Limit) {
     770         330 :     unsigned Imm = MI->getOperand(2).getImm();
     771         330 :     unsigned Limit = (1 << Entry.Imm2Limit) - 1;
     772         330 :     if (Imm > Limit)
     773             :       return false;
     774             :   } else {
     775         714 :     unsigned Reg2 = MI->getOperand(2).getReg();
     776         714 :     if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
     777             :       return false;
     778             :   }
     779             : 
     780             :   // Check if it's possible / necessary to transfer the predicate.
     781         964 :   const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
     782         964 :   unsigned PredReg = 0;
     783         964 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
     784             :   bool SkipPred = false;
     785         964 :   if (Pred != ARMCC::AL) {
     786          68 :     if (!NewMCID.isPredicable())
     787             :       // Can't transfer predicate, fail.
     788             :       return false;
     789             :   } else {
     790        1860 :     SkipPred = !NewMCID.isPredicable();
     791             :   }
     792             : 
     793         964 :   bool HasCC = false;
     794         964 :   bool CCDead = false;
     795         964 :   const MCInstrDesc &MCID = MI->getDesc();
     796        1928 :   if (MCID.hasOptionalDef()) {
     797         886 :     unsigned NumOps = MCID.getNumOperands();
     798        1772 :     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     799        1010 :     if (HasCC && MI->getOperand(NumOps-1).isDead())
     800           0 :       CCDead = true;
     801             :   }
     802         964 :   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
     803             :     return false;
     804             : 
     805             :   // Avoid adding a false dependency on partial flag update by some 16-bit
     806             :   // instructions which has the 's' bit set.
     807        1306 :   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
     808         193 :       canAddPseudoFlagDep(MI, IsSelfLoop))
     809             :     return false;
     810             : 
     811             :   // Add the 16-bit instruction.
     812             :   DebugLoc dl = MI->getDebugLoc();
     813             :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
     814         911 :   MIB.add(MI->getOperand(0));
     815        1822 :   if (NewMCID.hasOptionalDef())
     816         958 :     MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
     817             : 
     818             :   // Transfer the rest of operands.
     819         911 :   unsigned NumOps = MCID.getNumOperands();
     820        5491 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     821        5418 :     if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
     822         838 :       continue;
     823        3742 :     if (SkipPred && MCID.OpInfo[i].isPredicate())
     824           0 :       continue;
     825        3742 :     MIB.add(MI->getOperand(i));
     826             :   }
     827             : 
     828             :   // Transfer MI flags.
     829         911 :   MIB.setMIFlags(MI->getFlags());
     830             : 
     831             :   LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
     832             :                     << "       to 16-bit: " << *MIB);
     833             : 
     834             :   MBB.erase_instr(MI);
     835             :   ++Num2Addrs;
     836             :   return true;
     837             : }
     838             : 
     839             : bool
     840        5397 : Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     841             :                                  const ReduceEntry &Entry,
     842             :                                  bool LiveCPSR, bool IsSelfLoop) {
     843        5407 :   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     844             :     return false;
     845             : 
     846        5392 :   if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     847             :     // Don't issue movs with shifter operand for some CPUs unless we
     848             :     // are optimizing for size.
     849             :     return false;
     850             : 
     851             :   unsigned Limit = ~0U;
     852        5383 :   if (Entry.Imm1Limit)
     853        4486 :     Limit = (1 << Entry.Imm1Limit) - 1;
     854             : 
     855        5383 :   const MCInstrDesc &MCID = MI->getDesc();
     856       49294 :   for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     857       20989 :     if (MCID.OpInfo[i].isPredicate())
     858        7316 :       continue;
     859       13673 :     const MachineOperand &MO = MI->getOperand(i);
     860       13673 :     if (MO.isReg()) {
     861        9509 :       unsigned Reg = MO.getReg();
     862        9509 :       if (!Reg || Reg == ARM::CPSR)
     863        2388 :         continue;
     864        7121 :       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
     865             :         return false;
     866        4164 :     } else if (MO.isImm() &&
     867             :                !MCID.OpInfo[i].isPredicate()) {
     868        4164 :       if (((unsigned)MO.getImm()) > Limit)
     869             :         return false;
     870             :     }
     871             :   }
     872             : 
     873             :   // Check if it's possible / necessary to transfer the predicate.
     874        3658 :   const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
     875        3658 :   unsigned PredReg = 0;
     876        3658 :   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
     877             :   bool SkipPred = false;
     878        3658 :   if (Pred != ARMCC::AL) {
     879        1110 :     if (!NewMCID.isPredicable())
     880             :       // Can't transfer predicate, fail.
     881             :       return false;
     882             :   } else {
     883        6206 :     SkipPred = !NewMCID.isPredicable();
     884             :   }
     885             : 
     886        3658 :   bool HasCC = false;
     887        3658 :   bool CCDead = false;
     888        7316 :   if (MCID.hasOptionalDef()) {
     889        2388 :     unsigned NumOps = MCID.getNumOperands();
     890        4776 :     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     891        2510 :     if (HasCC && MI->getOperand(NumOps-1).isDead())
     892           0 :       CCDead = true;
     893             :   }
     894        3658 :   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
     895             :     return false;
     896             : 
     897             :   // Avoid adding a false dependency on partial flag update by some 16-bit
     898             :   // instructions which has the 's' bit set.
     899        7083 :   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
     900        1556 :       canAddPseudoFlagDep(MI, IsSelfLoop))
     901             :     return false;
     902             : 
     903             :   // Add the 16-bit instruction.
     904             :   DebugLoc dl = MI->getDebugLoc();
     905             :   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
     906        3538 :   MIB.add(MI->getOperand(0));
     907        7076 :   if (NewMCID.hasOptionalDef())
     908        4588 :     MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
     909             : 
     910             :   // Transfer the rest of operands.
     911        3538 :   unsigned NumOps = MCID.getNumOperands();
     912       19126 :   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
     913       17868 :     if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
     914        2280 :       continue;
     915       26928 :     if ((MCID.getOpcode() == ARM::t2RSBSri ||
     916       13202 :          MCID.getOpcode() == ARM::t2RSBri ||
     917       13052 :          MCID.getOpcode() == ARM::t2SXTB ||
     918       12868 :          MCID.getOpcode() == ARM::t2SXTH ||
     919       12676 :          MCID.getOpcode() == ARM::t2UXTB ||
     920       14564 :          MCID.getOpcode() == ARM::t2UXTH) && i == 2)
     921             :       // Skip the zero immediate operand, it's now implicit.
     922         312 :       continue;
     923       24170 :     bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
     924       12996 :     if (SkipPred && isPred)
     925           0 :         continue;
     926       12996 :     const MachineOperand &MO = MI->getOperand(i);
     927       20565 :     if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
     928             :       // Skip implicit def of CPSR. Either it's modeled as an optional
     929             :       // def now or it's already an implicit def on the new instruction.
     930         856 :       continue;
     931             :     MIB.add(MO);
     932             :   }
     933        7076 :   if (!MCID.isPredicable() && NewMCID.isPredicable())
     934           0 :     MIB.add(predOps(ARMCC::AL));
     935             : 
     936             :   // Transfer MI flags.
     937        3538 :   MIB.setMIFlags(MI->getFlags());
     938             : 
     939             :   LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
     940             :                     << "       to 16-bit: " << *MIB);
     941             : 
     942             :   MBB.erase_instr(MI);
     943             :   ++NumNarrows;
     944             :   return true;
     945             : }
     946             : 
     947      145376 : static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
     948             :   bool HasDef = false;
     949     1488472 :   for (const MachineOperand &MO : MI.operands()) {
     950     1568145 :     if (!MO.isReg() || MO.isUndef() || MO.isUse())
     951      532766 :       continue;
     952      138782 :     if (MO.getReg() != ARM::CPSR)
     953      132289 :       continue;
     954             : 
     955        6493 :     DefCPSR = true;
     956        6493 :     if (!MO.isDead())
     957             :       HasDef = true;
     958             :   }
     959             : 
     960      145376 :   return HasDef || LiveCPSR;
     961             : }
     962             : 
     963      145376 : static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
     964     1479720 :   for (const MachineOperand &MO : MI.operands()) {
     965     1565938 :     if (!MO.isReg() || MO.isUndef() || MO.isDef())
     966      361435 :       continue;
     967      309634 :     if (MO.getReg() != ARM::CPSR)
     968      305065 :       continue;
     969             :     assert(LiveCPSR && "CPSR liveness tracking is wrong!");
     970        4569 :     if (MO.isKill()) {
     971             :       LiveCPSR = false;
     972             :       break;
     973             :     }
     974             :   }
     975             : 
     976      145376 :   return LiveCPSR;
     977             : }
     978             : 
     979      145376 : bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
     980             :                                 bool LiveCPSR, bool IsSelfLoop) {
     981      290752 :   unsigned Opcode = MI->getOpcode();
     982      145376 :   DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
     983      145376 :   if (OPI == ReduceOpcodeMap.end())
     984             :     return false;
     985       16422 :   const ReduceEntry &Entry = ReduceTable[OPI->second];
     986             : 
     987             :   // Don't attempt normal reductions on "special" cases for now.
     988       16422 :   if (Entry.Special)
     989       11783 :     return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     990             : 
     991             :   // Try to transform to a 16-bit two-address instruction.
     992        5995 :   if (Entry.NarrowOpc2 &&
     993        1356 :       ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     994             :     return true;
     995             : 
     996             :   // Try to transform to a 16-bit non-two-address instruction.
     997        7571 :   if (Entry.NarrowOpc1 &&
     998        3680 :       ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
     999             :     return true;
    1000             : 
    1001             :   return false;
    1002             : }
    1003             : 
    1004       17424 : bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
    1005             :   bool Modified = false;
    1006             : 
    1007             :   // Yes, CPSR could be livein.
    1008       17424 :   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
    1009             :   MachineInstr *BundleMI = nullptr;
    1010             : 
    1011       17424 :   CPSRDef = nullptr;
    1012       17424 :   HighLatencyCPSR = false;
    1013             : 
    1014             :   // Check predecessors for the latest CPSRDef.
    1015       23064 :   for (auto *Pred : MBB.predecessors()) {
    1016        5738 :     const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
    1017        5738 :     if (!PInfo.Visited) {
    1018             :       // Since blocks are visited in RPO, this must be a back-edge.
    1019        1017 :       continue;
    1020             :     }
    1021        4721 :     if (PInfo.HighLatencyCPSR) {
    1022          98 :       HighLatencyCPSR = true;
    1023          98 :       break;
    1024             :     }
    1025             :   }
    1026             : 
    1027             :   // If this BB loops back to itself, conservatively avoid narrowing the
    1028             :   // first instruction that does partial flag update.
    1029       17424 :   bool IsSelfLoop = MBB.isSuccessor(&MBB);
    1030             :   MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
    1031             :   MachineBasicBlock::instr_iterator NextMII;
    1032      163798 :   for (; MII != E; MII = NextMII) {
    1033             :     NextMII = std::next(MII);
    1034             : 
    1035             :     MachineInstr *MI = &*MII;
    1036      146374 :     if (MI->isBundle()) {
    1037             :       BundleMI = MI;
    1038        1844 :       continue;
    1039             :     }
    1040         152 :     if (MI->isDebugInstr())
    1041         152 :       continue;
    1042             : 
    1043      145376 :     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
    1044             : 
    1045             :     // Does NextMII belong to the same bundle as MI?
    1046      273382 :     bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
    1047             : 
    1048      145376 :     if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
    1049             :       Modified = true;
    1050             :       MachineBasicBlock::instr_iterator I = std::prev(NextMII);
    1051             :       MI = &*I;
    1052             :       // Removing and reinserting the first instruction in a bundle will break
    1053             :       // up the bundle. Fix the bundling if it was broken.
    1054       10557 :       if (NextInSameBundle && !NextMII->isBundledWithPred())
    1055           0 :         NextMII->bundleWithPred();
    1056             :     }
    1057             : 
    1058      148143 :     if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
    1059             :       // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
    1060             :       // marker is only on the BUNDLE instruction. Process the BUNDLE
    1061             :       // instruction as we finish with the bundled instruction to work around
    1062             :       // the inconsistency.
    1063         846 :       if (BundleMI->killsRegister(ARM::CPSR))
    1064             :         LiveCPSR = false;
    1065             :       MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
    1066          80 :       if (MO && !MO->isDead())
    1067             :         LiveCPSR = true;
    1068             :       MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
    1069        1578 :       if (MO && !MO->isKill())
    1070             :         LiveCPSR = true;
    1071             :     }
    1072             : 
    1073      145376 :     bool DefCPSR = false;
    1074      145376 :     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
    1075      145376 :     if (MI->isCall()) {
    1076             :       // Calls don't really set CPSR.
    1077        7294 :       CPSRDef = nullptr;
    1078        7294 :       HighLatencyCPSR = false;
    1079             :       IsSelfLoop = false;
    1080      138082 :     } else if (DefCPSR) {
    1081             :       // This is the last CPSR defining instruction.
    1082        6443 :       CPSRDef = MI;
    1083        6443 :       HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
    1084             :       IsSelfLoop = false;
    1085             :     }
    1086             :   }
    1087             : 
    1088       17424 :   MBBInfo &Info = BlockInfo[MBB.getNumber()];
    1089       17424 :   Info.HighLatencyCPSR = HighLatencyCPSR;
    1090       17424 :   Info.Visited = true;
    1091       17424 :   return Modified;
    1092             : }
    1093             : 
    1094       26260 : bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
    1095       38784 :   if (PredicateFtor && !PredicateFtor(MF.getFunction()))
    1096             :     return false;
    1097             : 
    1098       14518 :   STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
    1099       27922 :   if (STI->isThumb1Only() || STI->prefers32BitThumb())
    1100             :     return false;
    1101             : 
    1102       13403 :   TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
    1103             : 
    1104             :   // Optimizing / minimizing size? Minimizing size implies optimizing for size.
    1105       13403 :   OptimizeSize = MF.getFunction().optForSize();
    1106       26806 :   MinimizeSize = MF.getFunction().optForMinSize();
    1107             : 
    1108       13403 :   BlockInfo.clear();
    1109       13403 :   BlockInfo.resize(MF.getNumBlockIDs());
    1110             : 
    1111             :   // Visit blocks in reverse post-order so LastCPSRDef is known for all
    1112             :   // predecessors.
    1113             :   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
    1114             :   bool Modified = false;
    1115             :   for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
    1116       30827 :        I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
    1117       17424 :     Modified |= ReduceMBB(**I);
    1118             :   return Modified;
    1119             : }
    1120             : 
    1121             : /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
    1122             : /// reduction pass.
    1123        5214 : FunctionPass *llvm::createThumb2SizeReductionPass(
    1124             :     std::function<bool(const Function &)> Ftor) {
    1125       10428 :   return new Thumb2SizeReduce(std::move(Ftor));
    1126      299229 : }

Generated by: LCOV version 1.13