Line data Source code
1 : //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "ARM.h"
11 : #include "ARMBaseInstrInfo.h"
12 : #include "ARMSubtarget.h"
13 : #include "MCTargetDesc/ARMBaseInfo.h"
14 : #include "Thumb2InstrInfo.h"
15 : #include "llvm/ADT/DenseMap.h"
16 : #include "llvm/ADT/PostOrderIterator.h"
17 : #include "llvm/ADT/STLExtras.h"
18 : #include "llvm/ADT/SmallSet.h"
19 : #include "llvm/ADT/SmallVector.h"
20 : #include "llvm/ADT/Statistic.h"
21 : #include "llvm/ADT/StringRef.h"
22 : #include "llvm/CodeGen/MachineBasicBlock.h"
23 : #include "llvm/CodeGen/MachineFunction.h"
24 : #include "llvm/CodeGen/MachineFunctionPass.h"
25 : #include "llvm/CodeGen/MachineInstr.h"
26 : #include "llvm/CodeGen/MachineInstrBuilder.h"
27 : #include "llvm/CodeGen/MachineOperand.h"
28 : #include "llvm/CodeGen/TargetInstrInfo.h"
29 : #include "llvm/IR/DebugLoc.h"
30 : #include "llvm/IR/Function.h"
31 : #include "llvm/MC/MCInstrDesc.h"
32 : #include "llvm/MC/MCRegisterInfo.h"
33 : #include "llvm/Support/CommandLine.h"
34 : #include "llvm/Support/Compiler.h"
35 : #include "llvm/Support/Debug.h"
36 : #include "llvm/Support/ErrorHandling.h"
37 : #include "llvm/Support/raw_ostream.h"
38 : #include <algorithm>
39 : #include <cassert>
40 : #include <cstdint>
41 : #include <functional>
42 : #include <iterator>
43 : #include <utility>
44 :
45 : using namespace llvm;
46 :
47 : #define DEBUG_TYPE "t2-reduce-size"
48 : #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
49 :
50 : STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
51 : STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
52 : STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
53 :
54 : static cl::opt<int> ReduceLimit("t2-reduce-limit",
55 : cl::init(-1), cl::Hidden);
56 : static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
57 : cl::init(-1), cl::Hidden);
58 : static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
59 : cl::init(-1), cl::Hidden);
60 :
61 : namespace {
62 :
63 : /// ReduceTable - A static table with information on mapping from wide
64 : /// opcodes to narrow
65 : struct ReduceEntry {
66 : uint16_t WideOpc; // Wide opcode
67 : uint16_t NarrowOpc1; // Narrow opcode to transform to
68 : uint16_t NarrowOpc2; // Narrow opcode when it's two-address
69 : uint8_t Imm1Limit; // Limit of immediate field (bits)
70 : uint8_t Imm2Limit; // Limit of immediate field when it's two-address
71 : unsigned LowRegs1 : 1; // Only possible if low-registers are used
72 : unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
73 : unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
74 : // 1 - No cc field.
75 : // 2 - Always set CPSR.
76 : unsigned PredCC2 : 2;
77 : unsigned PartFlag : 1; // 16-bit instruction does partial flag update
78 : unsigned Special : 1; // Needs to be dealt with specially
79 : unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
80 : };
81 :
82 : static const ReduceEntry ReduceTable[] = {
83 : // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
84 : { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
85 : { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
86 : { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
87 : { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
88 : { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
89 : { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
90 : { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
91 : { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
92 : { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
93 : //FIXME: Disable CMN, as CCodes are backwards from compare expectations
94 : //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
95 : { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
96 : { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
97 : { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
98 : { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
99 : // FIXME: adr.n immediate offset must be multiple of 4.
100 : //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
101 : { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 : { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 : { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
104 : { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
105 : { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
106 : { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
107 : // FIXME: Do we need the 16-bit 'S' variant?
108 : { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
109 : { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
110 : { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
111 : { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
112 : { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 : { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
114 : { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
115 : { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
116 : { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
117 : { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
118 : { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
119 : { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
120 : { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
121 : { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
122 : { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
123 : { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
124 : { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
125 : { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
126 : { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
127 : { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
128 :
129 : // FIXME: Clean this up after splitting each Thumb load / store opcode
130 : // into multiple ones.
131 : { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
132 : { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
133 : { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
134 : { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
135 : { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
136 : { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 : { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 : { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 : { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
140 : { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
141 : { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
142 : { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
143 : { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
144 : { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
145 : { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
146 : { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
147 :
148 : { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
149 : { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
150 : { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
151 : // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
152 : // tSTMIA_UPD is a change in semantics which can only be used if the base
153 : // register is killed. This difference is correctly handled elsewhere.
154 : { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
155 : { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
156 : { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
157 : };
158 :
159 : class Thumb2SizeReduce : public MachineFunctionPass {
160 : public:
161 : static char ID;
162 :
163 : const Thumb2InstrInfo *TII;
164 : const ARMSubtarget *STI;
165 :
166 : Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
167 :
168 : bool runOnMachineFunction(MachineFunction &MF) override;
169 :
170 5375 : MachineFunctionProperties getRequiredProperties() const override {
171 5375 : return MachineFunctionProperties().set(
172 5375 : MachineFunctionProperties::Property::NoVRegs);
173 : }
174 :
175 5373 : StringRef getPassName() const override {
176 5373 : return THUMB2_SIZE_REDUCE_NAME;
177 : }
178 :
179 : private:
180 : /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
181 : DenseMap<unsigned, unsigned> ReduceOpcodeMap;
182 :
183 : bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
184 :
185 : bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
186 : bool is2Addr, ARMCC::CondCodes Pred,
187 : bool LiveCPSR, bool &HasCC, bool &CCDead);
188 :
189 : bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
190 : const ReduceEntry &Entry);
191 :
192 : bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
193 : const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
194 :
195 : /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
196 : /// instruction.
197 : bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
198 : const ReduceEntry &Entry, bool LiveCPSR,
199 : bool IsSelfLoop);
200 :
201 : /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
202 : /// non-two-address instruction.
203 : bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
204 : const ReduceEntry &Entry, bool LiveCPSR,
205 : bool IsSelfLoop);
206 :
207 : /// ReduceMI - Attempt to reduce MI, return true on success.
208 : bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
209 : bool LiveCPSR, bool IsSelfLoop);
210 :
211 : /// ReduceMBB - Reduce width of instructions in the specified basic block.
212 : bool ReduceMBB(MachineBasicBlock &MBB);
213 :
214 : bool OptimizeSize;
215 : bool MinimizeSize;
216 :
217 : // Last instruction to define CPSR in the current block.
218 : MachineInstr *CPSRDef;
219 : // Was CPSR last defined by a high latency instruction?
220 : // When CPSRDef is null, this refers to CPSR defs in predecessors.
221 : bool HighLatencyCPSR;
222 :
223 : struct MBBInfo {
224 : // The flags leaving this block have high latency.
225 : bool HighLatencyCPSR = false;
226 : // Has this block been visited yet?
227 : bool Visited = false;
228 :
229 0 : MBBInfo() = default;
230 : };
231 :
232 : SmallVector<MBBInfo, 8> BlockInfo;
233 :
234 : std::function<bool(const Function &)> PredicateFtor;
235 : };
236 :
237 : char Thumb2SizeReduce::ID = 0;
238 :
239 : } // end anonymous namespace
240 :
241 199024 : INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
242 : false)
243 :
244 5399 : Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
245 5399 : : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
246 5399 : OptimizeSize = MinimizeSize = false;
247 334738 : for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
248 329339 : unsigned FromOpc = ReduceTable[i].WideOpc;
249 329339 : if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
250 0 : llvm_unreachable("Duplicated entries?");
251 : }
252 5399 : }
253 :
254 : static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
255 1091 : for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
256 1091 : if (*Regs == ARM::CPSR)
257 : return true;
258 : return false;
259 : }
260 :
261 : // Check for a likely high-latency flag def.
262 : static bool isHighLatencyCPSR(MachineInstr *Def) {
263 7310 : switch(Def->getOpcode()) {
264 : case ARM::FMSTAT:
265 : case ARM::tMUL:
266 : return true;
267 : }
268 : return false;
269 : }
270 :
271 : /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
272 : /// the 's' 16-bit instruction partially update CPSR. Abort the
273 : /// transformation to avoid adding false dependency on last CPSR setting
274 : /// instruction which hurts the ability for out-of-order execution engine
275 : /// to do register renaming magic.
276 : /// This function checks if there is a read-of-write dependency between the
277 : /// last instruction that defines the CPSR and the current instruction. If there
278 : /// is, then there is no harm done since the instruction cannot be retired
279 : /// before the CPSR setting instruction anyway.
280 : /// Note, we are not doing full dependency analysis here for the sake of compile
281 : /// time. We're not looking for cases like:
282 : /// r0 = muls ...
283 : /// r1 = add.w r0, ...
284 : /// ...
285 : /// = mul.w r1
286 : /// In this case it would have been ok to narrow the mul.w to muls since there
287 : /// are indirect RAW dependency between the muls and the mul.w
288 : bool
289 1974 : Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
290 : // Disable the check for -Oz (aka OptimizeForSizeHarder).
291 1974 : if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
292 : return false;
293 :
294 92 : if (!CPSRDef)
295 : // If this BB loops back to itself, conservatively avoid narrowing the
296 : // first instruction that does partial flag update.
297 122 : return HighLatencyCPSR || FirstInSelfLoop;
298 :
299 24 : SmallSet<unsigned, 2> Defs;
300 155 : for (const MachineOperand &MO : CPSRDef->operands()) {
301 131 : if (!MO.isReg() || MO.isUndef() || MO.isUse())
302 108 : continue;
303 47 : unsigned Reg = MO.getReg();
304 47 : if (Reg == 0 || Reg == ARM::CPSR)
305 : continue;
306 23 : Defs.insert(Reg);
307 : }
308 :
309 129 : for (const MachineOperand &MO : Use->operands()) {
310 110 : if (!MO.isReg() || MO.isUndef() || MO.isDef())
311 59 : continue;
312 51 : unsigned Reg = MO.getReg();
313 51 : if (Defs.count(Reg))
314 5 : return false;
315 : }
316 :
317 : // If the current CPSR has high latency, try to avoid the false dependency.
318 19 : if (HighLatencyCPSR)
319 : return true;
320 :
321 : // tMOVi8 usually doesn't start long dependency chains, and there are a lot
322 : // of them, so always shrink them when CPSR doesn't have high latency.
323 34 : if (Use->getOpcode() == ARM::t2MOVi ||
324 : Use->getOpcode() == ARM::t2MOVi16)
325 13 : return false;
326 :
327 : // No read-after-write dependency. The narrowing will add false dependency.
328 : return true;
329 : }
330 :
331 : bool
332 5585 : Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
333 : bool is2Addr, ARMCC::CondCodes Pred,
334 : bool LiveCPSR, bool &HasCC, bool &CCDead) {
335 5585 : if ((is2Addr && Entry.PredCC2 == 0) ||
336 4483 : (!is2Addr && Entry.PredCC1 == 0)) {
337 3451 : if (Pred == ARMCC::AL) {
338 : // Not predicated, must set CPSR.
339 2837 : if (!HasCC) {
340 : // Original instruction was not setting CPSR, but CPSR is not
341 : // currently live anyway. It's ok to set it. The CPSR def is
342 : // dead though.
343 2616 : if (!LiveCPSR) {
344 2458 : HasCC = true;
345 2458 : CCDead = true;
346 2458 : return true;
347 : }
348 : return false;
349 : }
350 : } else {
351 : // Predicated, must not set CPSR.
352 614 : if (HasCC)
353 0 : return false;
354 : }
355 2134 : } else if ((is2Addr && Entry.PredCC2 == 2) ||
356 1625 : (!is2Addr && Entry.PredCC1 == 2)) {
357 : /// Old opcode has an optional def of CPSR.
358 1091 : if (HasCC)
359 : return true;
360 : // If old opcode does not implicitly define CPSR, then it's not ok since
361 : // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
362 2182 : if (!HasImplicitCPSRDef(MI->getDesc()))
363 : return false;
364 1091 : HasCC = true;
365 : } else {
366 : // 16-bit instruction does not set CPSR.
367 1043 : if (HasCC)
368 38 : return false;
369 : }
370 :
371 : return true;
372 : }
373 :
374 9643 : static bool VerifyLowRegs(MachineInstr *MI) {
375 9643 : unsigned Opc = MI->getOpcode();
376 9643 : bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
377 9643 : bool isLROk = (Opc == ARM::t2STMDB_UPD);
378 9643 : bool isSPOk = isPCOk || isLROk;
379 55682 : for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
380 47728 : const MachineOperand &MO = MI->getOperand(i);
381 47728 : if (!MO.isReg() || MO.isImplicit())
382 : continue;
383 32561 : unsigned Reg = MO.getReg();
384 32561 : if (Reg == 0 || Reg == ARM::CPSR)
385 : continue;
386 24160 : if (isPCOk && Reg == ARM::PC)
387 : continue;
388 23209 : if (isLROk && Reg == ARM::LR)
389 : continue;
390 22236 : if (Reg == ARM::SP) {
391 6909 : if (isSPOk)
392 : continue;
393 2271 : if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
394 : // Special case for these ldr / str with sp as base register.
395 : continue;
396 : }
397 : if (!isARMLowRegister(Reg))
398 : return false;
399 : }
400 : return true;
401 : }
402 :
403 : bool
404 0 : Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
405 : const ReduceEntry &Entry) {
406 0 : if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
407 0 : return false;
408 :
409 : unsigned Scale = 1;
410 : bool HasImmOffset = false;
411 : bool HasShift = false;
412 : bool HasOffReg = true;
413 : bool isLdStMul = false;
414 0 : unsigned Opc = Entry.NarrowOpc1;
415 : unsigned OpNum = 3; // First 'rest' of operands.
416 0 : uint8_t ImmLimit = Entry.Imm1Limit;
417 :
418 0 : switch (Entry.WideOpc) {
419 0 : default:
420 0 : llvm_unreachable("Unexpected Thumb2 load / store opcode!");
421 0 : case ARM::t2LDRi12:
422 : case ARM::t2STRi12:
423 0 : if (MI->getOperand(1).getReg() == ARM::SP) {
424 0 : Opc = Entry.NarrowOpc2;
425 0 : ImmLimit = Entry.Imm2Limit;
426 : }
427 :
428 : Scale = 4;
429 : HasImmOffset = true;
430 : HasOffReg = false;
431 : break;
432 : case ARM::t2LDRBi12:
433 : case ARM::t2STRBi12:
434 : HasImmOffset = true;
435 : HasOffReg = false;
436 : break;
437 0 : case ARM::t2LDRHi12:
438 : case ARM::t2STRHi12:
439 : Scale = 2;
440 : HasImmOffset = true;
441 : HasOffReg = false;
442 0 : break;
443 0 : case ARM::t2LDRs:
444 : case ARM::t2LDRBs:
445 : case ARM::t2LDRHs:
446 : case ARM::t2LDRSBs:
447 : case ARM::t2LDRSHs:
448 : case ARM::t2STRs:
449 : case ARM::t2STRBs:
450 : case ARM::t2STRHs:
451 : HasShift = true;
452 : OpNum = 4;
453 0 : break;
454 0 : case ARM::t2LDR_POST:
455 : case ARM::t2STR_POST: {
456 0 : if (!MBB.getParent()->getFunction().optForMinSize())
457 0 : return false;
458 :
459 0 : if (!MI->hasOneMemOperand() ||
460 0 : (*MI->memoperands_begin())->getAlignment() < 4)
461 0 : return false;
462 :
463 : // We're creating a completely different type of load/store - LDM from LDR.
464 : // For this reason we can't reuse the logic at the end of this function; we
465 : // have to implement the MI building here.
466 0 : bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
467 0 : unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
468 0 : unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
469 0 : unsigned Offset = MI->getOperand(3).getImm();
470 0 : unsigned PredImm = MI->getOperand(4).getImm();
471 0 : unsigned PredReg = MI->getOperand(5).getReg();
472 : assert(isARMLowRegister(Rt));
473 : assert(isARMLowRegister(Rn));
474 :
475 0 : if (Offset != 4)
476 0 : return false;
477 :
478 : // Add the 16-bit load / store instruction.
479 : DebugLoc dl = MI->getDebugLoc();
480 0 : auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
481 0 : .addReg(Rn, RegState::Define)
482 0 : .addReg(Rn)
483 : .addImm(PredImm)
484 0 : .addReg(PredReg)
485 0 : .addReg(Rt, IsStore ? 0 : RegState::Define);
486 :
487 : // Transfer memoperands.
488 0 : MIB.setMemRefs(MI->memoperands());
489 :
490 : // Transfer MI flags.
491 0 : MIB.setMIFlags(MI->getFlags());
492 :
493 : // Kill the old instruction.
494 0 : MI->eraseFromBundle();
495 : ++NumLdSts;
496 : return true;
497 : }
498 0 : case ARM::t2LDMIA: {
499 0 : unsigned BaseReg = MI->getOperand(0).getReg();
500 : assert(isARMLowRegister(BaseReg));
501 :
502 : // For the non-writeback version (this one), the base register must be
503 : // one of the registers being loaded.
504 : bool isOK = false;
505 0 : for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
506 0 : if (MI->getOperand(i).getReg() == BaseReg) {
507 : isOK = true;
508 : break;
509 : }
510 : }
511 :
512 0 : if (!isOK)
513 0 : return false;
514 :
515 : OpNum = 0;
516 : isLdStMul = true;
517 : break;
518 : }
519 0 : case ARM::t2STMIA:
520 : // If the base register is killed, we don't care what its value is after the
521 : // instruction, so we can use an updating STMIA.
522 0 : if (!MI->getOperand(0).isKill())
523 0 : return false;
524 :
525 : break;
526 0 : case ARM::t2LDMIA_RET: {
527 0 : unsigned BaseReg = MI->getOperand(1).getReg();
528 0 : if (BaseReg != ARM::SP)
529 0 : return false;
530 0 : Opc = Entry.NarrowOpc2; // tPOP_RET
531 : OpNum = 2;
532 : isLdStMul = true;
533 0 : break;
534 : }
535 0 : case ARM::t2LDMIA_UPD:
536 : case ARM::t2STMIA_UPD:
537 : case ARM::t2STMDB_UPD: {
538 : OpNum = 0;
539 :
540 0 : unsigned BaseReg = MI->getOperand(1).getReg();
541 0 : if (BaseReg == ARM::SP &&
542 0 : (Entry.WideOpc == ARM::t2LDMIA_UPD ||
543 : Entry.WideOpc == ARM::t2STMDB_UPD)) {
544 0 : Opc = Entry.NarrowOpc2; // tPOP or tPUSH
545 0 : OpNum = 2;
546 0 : } else if (!isARMLowRegister(BaseReg) ||
547 0 : (Entry.WideOpc != ARM::t2LDMIA_UPD &&
548 : Entry.WideOpc != ARM::t2STMIA_UPD)) {
549 0 : return false;
550 : }
551 :
552 : isLdStMul = true;
553 : break;
554 : }
555 : }
556 :
557 : unsigned OffsetReg = 0;
558 : bool OffsetKill = false;
559 : bool OffsetInternal = false;
560 0 : if (HasShift) {
561 0 : OffsetReg = MI->getOperand(2).getReg();
562 : OffsetKill = MI->getOperand(2).isKill();
563 : OffsetInternal = MI->getOperand(2).isInternalRead();
564 :
565 0 : if (MI->getOperand(3).getImm())
566 : // Thumb1 addressing mode doesn't support shift.
567 0 : return false;
568 : }
569 :
570 : unsigned OffsetImm = 0;
571 0 : if (HasImmOffset) {
572 0 : OffsetImm = MI->getOperand(2).getImm();
573 0 : unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
574 :
575 0 : if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
576 : // Make sure the immediate field fits.
577 0 : return false;
578 : }
579 :
580 : // Add the 16-bit load / store instruction.
581 : DebugLoc dl = MI->getDebugLoc();
582 0 : MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
583 :
584 : // tSTMIA_UPD takes a defining register operand. We've already checked that
585 : // the register is killed, so mark it as dead here.
586 0 : if (Entry.WideOpc == ARM::t2STMIA)
587 0 : MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
588 :
589 0 : if (!isLdStMul) {
590 0 : MIB.add(MI->getOperand(0));
591 0 : MIB.add(MI->getOperand(1));
592 :
593 0 : if (HasImmOffset)
594 0 : MIB.addImm(OffsetImm / Scale);
595 :
596 : assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
597 :
598 0 : if (HasOffReg)
599 : MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
600 0 : getInternalReadRegState(OffsetInternal));
601 : }
602 :
603 : // Transfer the rest of operands.
604 0 : for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
605 0 : MIB.add(MI->getOperand(OpNum));
606 :
607 : // Transfer memoperands.
608 0 : MIB.setMemRefs(MI->memoperands());
609 :
610 : // Transfer MI flags.
611 0 : MIB.setMIFlags(MI->getFlags());
612 :
613 : LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
614 : << " to 16-bit: " << *MIB);
615 :
616 : MBB.erase_instr(MI);
617 : ++NumLdSts;
618 : return true;
619 : }
620 :
621 : bool
622 12419 : Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
623 : const ReduceEntry &Entry,
624 : bool LiveCPSR, bool IsSelfLoop) {
625 12419 : unsigned Opc = MI->getOpcode();
626 12419 : if (Opc == ARM::t2ADDri) {
627 : // If the source register is SP, try to reduce to tADDrSPi, otherwise
628 : // it's a normal reduce.
629 2518 : if (MI->getOperand(1).getReg() != ARM::SP) {
630 832 : if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
631 : return true;
632 622 : return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
633 : }
634 : // Try to reduce to tADDrSPi.
635 1686 : unsigned Imm = MI->getOperand(2).getImm();
636 : // The immediate must be in range, the destination register must be a low
637 : // reg, the predicate must be "always" and the condition flags must not
638 : // be being set.
639 1686 : if (Imm & 3 || Imm > 1020)
640 : return false;
641 852 : if (!isARMLowRegister(MI->getOperand(0).getReg()))
642 : return false;
643 655 : if (MI->getOperand(3).getImm() != ARMCC::AL)
644 : return false;
645 : const MCInstrDesc &MCID = MI->getDesc();
646 1310 : if (MCID.hasOptionalDef() &&
647 1965 : MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
648 : return false;
649 :
650 : MachineInstrBuilder MIB =
651 : BuildMI(MBB, MI, MI->getDebugLoc(),
652 654 : TII->get(ARM::tADDrSPi))
653 654 : .add(MI->getOperand(0))
654 654 : .add(MI->getOperand(1))
655 654 : .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
656 654 : .add(predOps(ARMCC::AL));
657 :
658 : // Transfer MI flags.
659 654 : MIB.setMIFlags(MI->getFlags());
660 :
661 : LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
662 : << " to 16-bit: " << *MIB);
663 :
664 : MBB.erase_instr(MI);
665 : ++NumNarrows;
666 654 : return true;
667 : }
668 :
669 9901 : if (Entry.LowRegs1 && !VerifyLowRegs(MI))
670 : return false;
671 :
672 8212 : if (MI->mayLoadOrStore())
673 6542 : return ReduceLoadStore(MBB, MI, Entry);
674 :
675 1670 : switch (Opc) {
676 : default: break;
677 0 : case ARM::t2ADDSri:
678 : case ARM::t2ADDSrr: {
679 0 : unsigned PredReg = 0;
680 0 : if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
681 : switch (Opc) {
682 : default: break;
683 0 : case ARM::t2ADDSri:
684 0 : if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
685 0 : return true;
686 : LLVM_FALLTHROUGH;
687 : case ARM::t2ADDSrr:
688 0 : return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
689 : }
690 : }
691 0 : break;
692 : }
693 447 : case ARM::t2RSBri:
694 : case ARM::t2RSBSri:
695 : case ARM::t2SXTB:
696 : case ARM::t2SXTH:
697 : case ARM::t2UXTB:
698 : case ARM::t2UXTH:
699 447 : if (MI->getOperand(2).getImm() == 0)
700 427 : return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
701 : break;
702 965 : case ARM::t2MOVi16:
703 : // Can convert only 'pure' immediate operands, not immediates obtained as
704 : // globals' addresses.
705 1930 : if (MI->getOperand(1).isImm())
706 620 : return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
707 : break;
708 258 : case ARM::t2CMPrr: {
709 : // Try to reduce to the lo-reg only version first. Why there are two
710 : // versions of the instruction is a mystery.
711 : // It would be nice to just have two entries in the master table that
712 : // are prioritized, but the table assumes a unique entry for each
713 : // source insn opcode. So for now, we hack a local entry record to use.
714 : static const ReduceEntry NarrowEntry =
715 : { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
716 258 : if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
717 : return true;
718 26 : return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
719 : }
720 : }
721 : return false;
722 : }
723 :
724 : bool
725 2340 : Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
726 : const ReduceEntry &Entry,
727 : bool LiveCPSR, bool IsSelfLoop) {
728 2340 : if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
729 : return false;
730 :
731 2334 : if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
732 : // Don't issue movs with shifter operand for some CPUs unless we
733 : // are optimizing for size.
734 : return false;
735 :
736 2331 : unsigned Reg0 = MI->getOperand(0).getReg();
737 2331 : unsigned Reg1 = MI->getOperand(1).getReg();
738 : // t2MUL is "special". The tied source operand is second, not first.
739 4662 : if (MI->getOpcode() == ARM::t2MUL) {
740 96 : unsigned Reg2 = MI->getOperand(2).getReg();
741 : // Early exit if the regs aren't all low regs.
742 : if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
743 : || !isARMLowRegister(Reg2))
744 : return false;
745 90 : if (Reg0 != Reg2) {
746 : // If the other operand also isn't the same as the destination, we
747 : // can't reduce.
748 42 : if (Reg1 != Reg0)
749 : return false;
750 : // Try to commute the operands to make it a 2-address instruction.
751 37 : MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
752 37 : if (!CommutedMI)
753 : return false;
754 : }
755 2235 : } else if (Reg0 != Reg1) {
756 : // Try to commute the operands to make it a 2-address instruction.
757 1301 : unsigned CommOpIdx1 = 1;
758 1301 : unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
759 1301 : if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
760 1046 : MI->getOperand(CommOpIdx2).getReg() != Reg0)
761 984 : return false;
762 : MachineInstr *CommutedMI =
763 317 : TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
764 317 : if (!CommutedMI)
765 : return false;
766 : }
767 1336 : if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
768 : return false;
769 1193 : if (Entry.Imm2Limit) {
770 392 : unsigned Imm = MI->getOperand(2).getImm();
771 392 : unsigned Limit = (1 << Entry.Imm2Limit) - 1;
772 392 : if (Imm > Limit)
773 : return false;
774 : } else {
775 801 : unsigned Reg2 = MI->getOperand(2).getReg();
776 801 : if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
777 : return false;
778 : }
779 :
780 : // Check if it's possible / necessary to transfer the predicate.
781 1102 : const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
782 1102 : unsigned PredReg = 0;
783 1102 : ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
784 : bool SkipPred = false;
785 1102 : if (Pred != ARMCC::AL) {
786 68 : if (!NewMCID.isPredicable())
787 : // Can't transfer predicate, fail.
788 : return false;
789 : } else {
790 2136 : SkipPred = !NewMCID.isPredicable();
791 : }
792 :
793 1102 : bool HasCC = false;
794 1102 : bool CCDead = false;
795 1102 : const MCInstrDesc &MCID = MI->getDesc();
796 2204 : if (MCID.hasOptionalDef()) {
797 1017 : unsigned NumOps = MCID.getNumOperands();
798 1017 : HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
799 1017 : if (HasCC && MI->getOperand(NumOps-1).isDead())
800 0 : CCDead = true;
801 : }
802 1102 : if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
803 : return false;
804 :
805 : // Avoid adding a false dependency on partial flag update by some 16-bit
806 : // instructions which has the 's' bit set.
807 1268 : if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
808 222 : canAddPseudoFlagDep(MI, IsSelfLoop))
809 : return false;
810 :
811 : // Add the 16-bit instruction.
812 : DebugLoc dl = MI->getDebugLoc();
813 : MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
814 1041 : MIB.add(MI->getOperand(0));
815 2082 : if (NewMCID.hasOptionalDef())
816 1140 : MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
817 :
818 : // Transfer the rest of operands.
819 1041 : unsigned NumOps = MCID.getNumOperands();
820 6265 : for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
821 5224 : if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
822 : continue;
823 4263 : if (SkipPred && MCID.OpInfo[i].isPredicate())
824 : continue;
825 4263 : MIB.add(MI->getOperand(i));
826 : }
827 :
828 : // Transfer MI flags.
829 1041 : MIB.setMIFlags(MI->getFlags());
830 :
831 : LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
832 : << " to 16-bit: " << *MIB);
833 :
834 : MBB.erase_instr(MI);
835 : ++Num2Addrs;
836 : return true;
837 : }
838 :
839 : bool
840 6321 : Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
841 : const ReduceEntry &Entry,
842 : bool LiveCPSR, bool IsSelfLoop) {
843 6321 : if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
844 : return false;
845 :
846 6316 : if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
847 : // Don't issue movs with shifter operand for some CPUs unless we
848 : // are optimizing for size.
849 : return false;
850 :
851 : unsigned Limit = ~0U;
852 6307 : if (Entry.Imm1Limit)
853 5152 : Limit = (1 << Entry.Imm1Limit) - 1;
854 :
855 6307 : const MCInstrDesc &MCID = MI->getDesc();
856 29657 : for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
857 25174 : if (MCID.OpInfo[i].isPredicate())
858 : continue;
859 16208 : const MachineOperand &MO = MI->getOperand(i);
860 16208 : if (MO.isReg()) {
861 11282 : unsigned Reg = MO.getReg();
862 11282 : if (!Reg || Reg == ARM::CPSR)
863 : continue;
864 8420 : if (Entry.LowRegs1 && !isARMLowRegister(Reg))
865 : return false;
866 4926 : } else if (MO.isImm() &&
867 : !MCID.OpInfo[i].isPredicate()) {
868 4926 : if (((unsigned)MO.getImm()) > Limit)
869 : return false;
870 : }
871 : }
872 :
873 : // Check if it's possible / necessary to transfer the predicate.
874 4483 : const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
875 4483 : unsigned PredReg = 0;
876 4483 : ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
877 : bool SkipPred = false;
878 4483 : if (Pred != ARMCC::AL) {
879 1476 : if (!NewMCID.isPredicable())
880 : // Can't transfer predicate, fail.
881 : return false;
882 : } else {
883 7490 : SkipPred = !NewMCID.isPredicable();
884 : }
885 :
886 4483 : bool HasCC = false;
887 4483 : bool CCDead = false;
888 8966 : if (MCID.hasOptionalDef()) {
889 2862 : unsigned NumOps = MCID.getNumOperands();
890 2862 : HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
891 2862 : if (HasCC && MI->getOperand(NumOps-1).isDead())
892 0 : CCDead = true;
893 : }
894 4483 : if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
895 : return false;
896 :
897 : // Avoid adding a false dependency on partial flag update by some 16-bit
898 : // instructions which has the 's' bit set.
899 6095 : if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
900 1752 : canAddPseudoFlagDep(MI, IsSelfLoop))
901 : return false;
902 :
903 : // Add the 16-bit instruction.
904 : DebugLoc dl = MI->getDebugLoc();
905 : MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
906 4328 : MIB.add(MI->getOperand(0));
907 8656 : if (NewMCID.hasOptionalDef())
908 5406 : MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
909 :
910 : // Transfer the rest of operands.
911 4328 : unsigned NumOps = MCID.getNumOperands();
912 23516 : for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
913 19188 : if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
914 : continue;
915 16469 : if ((MCID.getOpcode() == ARM::t2RSBSri ||
916 16363 : MCID.getOpcode() == ARM::t2RSBri ||
917 16125 : MCID.getOpcode() == ARM::t2SXTB ||
918 15889 : MCID.getOpcode() == ARM::t2SXTH ||
919 15501 : MCID.getOpcode() == ARM::t2UXTB ||
920 18185 : MCID.getOpcode() == ARM::t2UXTH) && i == 2)
921 : // Skip the zero immediate operand, it's now implicit.
922 : continue;
923 16042 : bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
924 16042 : if (SkipPred && isPred)
925 : continue;
926 16042 : const MachineOperand &MO = MI->getOperand(i);
927 16042 : if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
928 : // Skip implicit def of CPSR. Either it's modeled as an optional
929 : // def now or it's already an implicit def on the new instruction.
930 : continue;
931 : MIB.add(MO);
932 : }
933 8656 : if (!MCID.isPredicable() && NewMCID.isPredicable())
934 0 : MIB.add(predOps(ARMCC::AL));
935 :
936 : // Transfer MI flags.
937 4328 : MIB.setMIFlags(MI->getFlags());
938 :
939 : LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
940 : << " to 16-bit: " << *MIB);
941 :
942 : MBB.erase_instr(MI);
943 : ++NumNarrows;
944 : return true;
945 : }
946 :
947 152223 : static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
948 : bool HasDef = false;
949 853884 : for (const MachineOperand &MO : MI.operands()) {
950 701661 : if (!MO.isReg() || MO.isUndef() || MO.isUse())
951 : continue;
952 145028 : if (MO.getReg() != ARM::CPSR)
953 : continue;
954 :
955 7360 : DefCPSR = true;
956 7360 : if (!MO.isDead())
957 : HasDef = true;
958 : }
959 :
960 152223 : return HasDef || LiveCPSR;
961 : }
962 :
963 152223 : static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
964 848591 : for (const MachineOperand &MO : MI.operands()) {
965 700707 : if (!MO.isReg() || MO.isUndef() || MO.isDef())
966 : continue;
967 323469 : if (MO.getReg() != ARM::CPSR)
968 : continue;
969 : assert(LiveCPSR && "CPSR liveness tracking is wrong!");
970 5031 : if (MO.isKill()) {
971 : LiveCPSR = false;
972 : break;
973 : }
974 : }
975 :
976 152223 : return LiveCPSR;
977 : }
978 :
979 152223 : bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
980 : bool LiveCPSR, bool IsSelfLoop) {
981 152223 : unsigned Opcode = MI->getOpcode();
982 152223 : DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
983 152223 : if (OPI == ReduceOpcodeMap.end())
984 : return false;
985 17850 : const ReduceEntry &Entry = ReduceTable[OPI->second];
986 :
987 : // Don't attempt normal reductions on "special" cases for now.
988 17850 : if (Entry.Special)
989 12419 : return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
990 :
991 : // Try to transform to a 16-bit two-address instruction.
992 6939 : if (Entry.NarrowOpc2 &&
993 1508 : ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
994 : return true;
995 :
996 : // Try to transform to a 16-bit non-two-address instruction.
997 8968 : if (Entry.NarrowOpc1 &&
998 4368 : ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
999 3448 : return true;
1000 :
1001 : return false;
1002 : }
1003 :
1004 18766 : bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
1005 : bool Modified = false;
1006 :
1007 : // Yes, CPSR could be livein.
1008 18766 : bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1009 : MachineInstr *BundleMI = nullptr;
1010 :
1011 18766 : CPSRDef = nullptr;
1012 18766 : HighLatencyCPSR = false;
1013 :
1014 : // Check predecessors for the latest CPSRDef.
1015 24723 : for (auto *Pred : MBB.predecessors()) {
1016 6063 : const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1017 6063 : if (!PInfo.Visited) {
1018 : // Since blocks are visited in RPO, this must be a back-edge.
1019 : continue;
1020 : }
1021 4968 : if (PInfo.HighLatencyCPSR) {
1022 106 : HighLatencyCPSR = true;
1023 106 : break;
1024 : }
1025 : }
1026 :
1027 : // If this BB loops back to itself, conservatively avoid narrowing the
1028 : // first instruction that does partial flag update.
1029 18766 : bool IsSelfLoop = MBB.isSuccessor(&MBB);
1030 : MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
1031 : MachineBasicBlock::instr_iterator NextMII;
1032 172212 : for (; MII != E; MII = NextMII) {
1033 : NextMII = std::next(MII);
1034 :
1035 : MachineInstr *MI = &*MII;
1036 153446 : if (MI->isBundle()) {
1037 : BundleMI = MI;
1038 1223 : continue;
1039 : }
1040 : if (MI->isDebugInstr())
1041 : continue;
1042 :
1043 152223 : LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1044 :
1045 : // Does NextMII belong to the same bundle as MI?
1046 152223 : bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1047 :
1048 152223 : if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
1049 : Modified = true;
1050 : MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1051 : MI = &*I;
1052 : // Removing and reinserting the first instruction in a bundle will break
1053 : // up the bundle. Fix the bundling if it was broken.
1054 11588 : if (NextInSameBundle && !NextMII->isBundledWithPred())
1055 0 : NextMII->bundleWithPred();
1056 : }
1057 :
1058 152223 : if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1059 : // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1060 : // marker is only on the BUNDLE instruction. Process the BUNDLE
1061 : // instruction as we finish with the bundled instruction to work around
1062 : // the inconsistency.
1063 1071 : if (BundleMI->killsRegister(ARM::CPSR))
1064 : LiveCPSR = false;
1065 : MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
1066 40 : if (MO && !MO->isDead())
1067 : LiveCPSR = true;
1068 : MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
1069 1006 : if (MO && !MO->isKill())
1070 : LiveCPSR = true;
1071 : }
1072 :
1073 152223 : bool DefCPSR = false;
1074 152223 : LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1075 152223 : if (MI->isCall()) {
1076 : // Calls don't really set CPSR.
1077 7532 : CPSRDef = nullptr;
1078 7532 : HighLatencyCPSR = false;
1079 : IsSelfLoop = false;
1080 144691 : } else if (DefCPSR) {
1081 : // This is the last CPSR defining instruction.
1082 7310 : CPSRDef = MI;
1083 7310 : HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1084 : IsSelfLoop = false;
1085 : }
1086 : }
1087 :
1088 18766 : MBBInfo &Info = BlockInfo[MBB.getNumber()];
1089 18766 : Info.HighLatencyCPSR = HighLatencyCPSR;
1090 18766 : Info.Visited = true;
1091 18766 : return Modified;
1092 : }
1093 :
1094 27966 : bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1095 27966 : if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1096 : return false;
1097 :
1098 15691 : STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
1099 20972 : if (STI->isThumb1Only() || STI->prefers32BitThumb())
1100 : return false;
1101 :
1102 14520 : TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1103 :
1104 : // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1105 14520 : OptimizeSize = MF.getFunction().optForSize();
1106 14520 : MinimizeSize = MF.getFunction().optForMinSize();
1107 :
1108 14520 : BlockInfo.clear();
1109 14520 : BlockInfo.resize(MF.getNumBlockIDs());
1110 :
1111 : // Visit blocks in reverse post-order so LastCPSRDef is known for all
1112 : // predecessors.
1113 : ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1114 : bool Modified = false;
1115 : for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
1116 33286 : I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
1117 18766 : Modified |= ReduceMBB(**I);
1118 : return Modified;
1119 : }
1120 :
1121 : /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1122 : /// reduction pass.
1123 5398 : FunctionPass *llvm::createThumb2SizeReductionPass(
1124 : std::function<bool(const Function &)> Ftor) {
1125 5398 : return new Thumb2SizeReduce(std::move(Ftor));
1126 : }
|