File: | lib/Target/ARM/Thumb2SizeReduction.cpp |
Location: | line 387, column 7 |
Description: | Value stored to 'HasOffReg' is never read |
1 | //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #include "ARM.h" |
11 | #include "ARMBaseInstrInfo.h" |
12 | #include "ARMSubtarget.h" |
13 | #include "MCTargetDesc/ARMAddressingModes.h" |
14 | #include "Thumb2InstrInfo.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/PostOrderIterator.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | #include "llvm/CodeGen/MachineInstr.h" |
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
21 | #include "llvm/IR/Function.h" // To access Function attributes |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/Debug.h" |
24 | #include "llvm/Target/TargetMachine.h" |
25 | using namespace llvm; |
26 | |
27 | #define DEBUG_TYPE"t2-reduce-size" "t2-reduce-size" |
28 | |
29 | STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones")static llvm::Statistic NumNarrows = { "t2-reduce-size", "Number of 32-bit instrs reduced to 16-bit ones" , 0, 0 }; |
30 | STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones")static llvm::Statistic Num2Addrs = { "t2-reduce-size", "Number of 32-bit instrs reduced to 2addr 16-bit ones" , 0, 0 }; |
31 | STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones")static llvm::Statistic NumLdSts = { "t2-reduce-size", "Number of 32-bit load / store reduced to 16-bit ones" , 0, 0 }; |
32 | |
33 | static cl::opt<int> ReduceLimit("t2-reduce-limit", |
34 | cl::init(-1), cl::Hidden); |
35 | static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", |
36 | cl::init(-1), cl::Hidden); |
37 | static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", |
38 | cl::init(-1), cl::Hidden); |
39 | |
40 | namespace { |
41 | /// ReduceTable - A static table with information on mapping from wide |
42 | /// opcodes to narrow |
43 | struct ReduceEntry { |
44 | uint16_t WideOpc; // Wide opcode |
45 | uint16_t NarrowOpc1; // Narrow opcode to transform to |
46 | uint16_t NarrowOpc2; // Narrow opcode when it's two-address |
47 | uint8_t Imm1Limit; // Limit of immediate field (bits) |
48 | uint8_t Imm2Limit; // Limit of immediate field when it's two-address |
49 | unsigned LowRegs1 : 1; // Only possible if low-registers are used |
50 | unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) |
51 | unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. |
52 | // 1 - No cc field. |
53 | // 2 - Always set CPSR. |
54 | unsigned PredCC2 : 2; |
55 | unsigned PartFlag : 1; // 16-bit instruction does partial flag update |
56 | unsigned Special : 1; // Needs to be dealt with specially |
57 | unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) |
58 | }; |
59 | |
60 | static const ReduceEntry ReduceTable[] = { |
61 | // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM |
62 | { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, |
63 | { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, |
64 | { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, |
65 | { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, |
66 | { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, |
67 | { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, |
68 | { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, |
69 | { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, |
70 | { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, |
71 | //FIXME: Disable CMN, as CCodes are backwards from compare expectations |
72 | //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, |
73 | { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, |
74 | { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, |
75 | { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, |
76 | { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, |
77 | // FIXME: adr.n immediate offset must be multiple of 4. |
78 | //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, |
79 | { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, |
80 | { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, |
81 | { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, |
82 | { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, |
83 | { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, |
84 | { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, |
85 | // FIXME: Do we need the 16-bit 'S' variant? |
86 | { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, |
87 | { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, |
88 | { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, |
89 | { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, |
90 | { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, |
91 | { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, |
92 | { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, |
93 | { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, |
94 | { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
95 | { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, |
96 | { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, |
97 | { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, |
98 | { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, |
99 | { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, |
100 | { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, |
101 | { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, |
102 | { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, |
103 | { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, |
104 | { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, |
105 | { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, |
106 | |
107 | // FIXME: Clean this up after splitting each Thumb load / store opcode |
108 | // into multiple ones. |
109 | { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, |
110 | { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
111 | { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, |
112 | { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
113 | { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, |
114 | { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
115 | { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
116 | { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
117 | { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, |
118 | { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
119 | { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, |
120 | { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
121 | { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, |
122 | { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, |
123 | |
124 | { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, |
125 | { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, |
126 | { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, |
127 | // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent |
128 | { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, |
129 | { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } |
130 | }; |
131 | |
132 | class Thumb2SizeReduce : public MachineFunctionPass { |
133 | public: |
134 | static char ID; |
135 | Thumb2SizeReduce(); |
136 | |
137 | const Thumb2InstrInfo *TII; |
138 | const ARMSubtarget *STI; |
139 | |
140 | bool runOnMachineFunction(MachineFunction &MF) override; |
141 | |
142 | const char *getPassName() const override { |
143 | return "Thumb2 instruction size reduction pass"; |
144 | } |
145 | |
146 | private: |
147 | /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. |
148 | DenseMap<unsigned, unsigned> ReduceOpcodeMap; |
149 | |
150 | bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); |
151 | |
152 | bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, |
153 | bool is2Addr, ARMCC::CondCodes Pred, |
154 | bool LiveCPSR, bool &HasCC, bool &CCDead); |
155 | |
156 | bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, |
157 | const ReduceEntry &Entry); |
158 | |
159 | bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, |
160 | const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); |
161 | |
162 | /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address |
163 | /// instruction. |
164 | bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, |
165 | const ReduceEntry &Entry, bool LiveCPSR, |
166 | bool IsSelfLoop); |
167 | |
168 | /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit |
169 | /// non-two-address instruction. |
170 | bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, |
171 | const ReduceEntry &Entry, bool LiveCPSR, |
172 | bool IsSelfLoop); |
173 | |
174 | /// ReduceMI - Attempt to reduce MI, return true on success. |
175 | bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, |
176 | bool LiveCPSR, bool IsSelfLoop); |
177 | |
178 | /// ReduceMBB - Reduce width of instructions in the specified basic block. |
179 | bool ReduceMBB(MachineBasicBlock &MBB); |
180 | |
181 | bool OptimizeSize; |
182 | bool MinimizeSize; |
183 | |
184 | // Last instruction to define CPSR in the current block. |
185 | MachineInstr *CPSRDef; |
186 | // Was CPSR last defined by a high latency instruction? |
187 | // When CPSRDef is null, this refers to CPSR defs in predecessors. |
188 | bool HighLatencyCPSR; |
189 | |
190 | struct MBBInfo { |
191 | // The flags leaving this block have high latency. |
192 | bool HighLatencyCPSR; |
193 | // Has this block been visited yet? |
194 | bool Visited; |
195 | |
196 | MBBInfo() : HighLatencyCPSR(false), Visited(false) {} |
197 | }; |
198 | |
199 | SmallVector<MBBInfo, 8> BlockInfo; |
200 | }; |
201 | char Thumb2SizeReduce::ID = 0; |
202 | } |
203 | |
204 | Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { |
205 | OptimizeSize = MinimizeSize = false; |
206 | for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { |
207 | unsigned FromOpc = ReduceTable[i].WideOpc; |
208 | if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) |
209 | assert(false && "Duplicated entries?")((false && "Duplicated entries?") ? static_cast<void > (0) : __assert_fail ("false && \"Duplicated entries?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220848/lib/Target/ARM/Thumb2SizeReduction.cpp" , 209, __PRETTY_FUNCTION__)); |
210 | } |
211 | } |
212 | |
213 | static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { |
214 | for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) |
215 | if (*Regs == ARM::CPSR) |
216 | return true; |
217 | return false; |
218 | } |
219 | |
220 | // Check for a likely high-latency flag def. |
221 | static bool isHighLatencyCPSR(MachineInstr *Def) { |
222 | switch(Def->getOpcode()) { |
223 | case ARM::FMSTAT: |
224 | case ARM::tMUL: |
225 | return true; |
226 | } |
227 | return false; |
228 | } |
229 | |
230 | /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, |
231 | /// the 's' 16-bit instruction partially update CPSR. Abort the |
232 | /// transformation to avoid adding false dependency on last CPSR setting |
233 | /// instruction which hurts the ability for out-of-order execution engine |
234 | /// to do register renaming magic. |
235 | /// This function checks if there is a read-of-write dependency between the |
236 | /// last instruction that defines the CPSR and the current instruction. If there |
237 | /// is, then there is no harm done since the instruction cannot be retired |
238 | /// before the CPSR setting instruction anyway. |
239 | /// Note, we are not doing full dependency analysis here for the sake of compile |
240 | /// time. We're not looking for cases like: |
241 | /// r0 = muls ... |
242 | /// r1 = add.w r0, ... |
243 | /// ... |
244 | /// = mul.w r1 |
245 | /// In this case it would have been ok to narrow the mul.w to muls since there |
246 | /// are indirect RAW dependency between the muls and the mul.w |
247 | bool |
248 | Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { |
249 | // Disable the check for -Oz (aka OptimizeForSizeHarder). |
250 | if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) |
251 | return false; |
252 | |
253 | if (!CPSRDef) |
254 | // If this BB loops back to itself, conservatively avoid narrowing the |
255 | // first instruction that does partial flag update. |
256 | return HighLatencyCPSR || FirstInSelfLoop; |
257 | |
258 | SmallSet<unsigned, 2> Defs; |
259 | for (const MachineOperand &MO : CPSRDef->operands()) { |
260 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
261 | continue; |
262 | unsigned Reg = MO.getReg(); |
263 | if (Reg == 0 || Reg == ARM::CPSR) |
264 | continue; |
265 | Defs.insert(Reg); |
266 | } |
267 | |
268 | for (const MachineOperand &MO : Use->operands()) { |
269 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) |
270 | continue; |
271 | unsigned Reg = MO.getReg(); |
272 | if (Defs.count(Reg)) |
273 | return false; |
274 | } |
275 | |
276 | // If the current CPSR has high latency, try to avoid the false dependency. |
277 | if (HighLatencyCPSR) |
278 | return true; |
279 | |
280 | // tMOVi8 usually doesn't start long dependency chains, and there are a lot |
281 | // of them, so always shrink them when CPSR doesn't have high latency. |
282 | if (Use->getOpcode() == ARM::t2MOVi || |
283 | Use->getOpcode() == ARM::t2MOVi16) |
284 | return false; |
285 | |
286 | // No read-after-write dependency. The narrowing will add false dependency. |
287 | return true; |
288 | } |
289 | |
290 | bool |
291 | Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, |
292 | bool is2Addr, ARMCC::CondCodes Pred, |
293 | bool LiveCPSR, bool &HasCC, bool &CCDead) { |
294 | if ((is2Addr && Entry.PredCC2 == 0) || |
295 | (!is2Addr && Entry.PredCC1 == 0)) { |
296 | if (Pred == ARMCC::AL) { |
297 | // Not predicated, must set CPSR. |
298 | if (!HasCC) { |
299 | // Original instruction was not setting CPSR, but CPSR is not |
300 | // currently live anyway. It's ok to set it. The CPSR def is |
301 | // dead though. |
302 | if (!LiveCPSR) { |
303 | HasCC = true; |
304 | CCDead = true; |
305 | return true; |
306 | } |
307 | return false; |
308 | } |
309 | } else { |
310 | // Predicated, must not set CPSR. |
311 | if (HasCC) |
312 | return false; |
313 | } |
314 | } else if ((is2Addr && Entry.PredCC2 == 2) || |
315 | (!is2Addr && Entry.PredCC1 == 2)) { |
316 | /// Old opcode has an optional def of CPSR. |
317 | if (HasCC) |
318 | return true; |
319 | // If old opcode does not implicitly define CPSR, then it's not ok since |
320 | // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. |
321 | if (!HasImplicitCPSRDef(MI->getDesc())) |
322 | return false; |
323 | HasCC = true; |
324 | } else { |
325 | // 16-bit instruction does not set CPSR. |
326 | if (HasCC) |
327 | return false; |
328 | } |
329 | |
330 | return true; |
331 | } |
332 | |
333 | static bool VerifyLowRegs(MachineInstr *MI) { |
334 | unsigned Opc = MI->getOpcode(); |
335 | bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || |
336 | Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || |
337 | Opc == ARM::t2LDMDB_UPD); |
338 | bool isLROk = (Opc == ARM::t2STMDB_UPD); |
339 | bool isSPOk = isPCOk || isLROk; |
340 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
341 | const MachineOperand &MO = MI->getOperand(i); |
342 | if (!MO.isReg() || MO.isImplicit()) |
343 | continue; |
344 | unsigned Reg = MO.getReg(); |
345 | if (Reg == 0 || Reg == ARM::CPSR) |
346 | continue; |
347 | if (isPCOk && Reg == ARM::PC) |
348 | continue; |
349 | if (isLROk && Reg == ARM::LR) |
350 | continue; |
351 | if (Reg == ARM::SP) { |
352 | if (isSPOk) |
353 | continue; |
354 | if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) |
355 | // Special case for these ldr / str with sp as base register. |
356 | continue; |
357 | } |
358 | if (!isARMLowRegister(Reg)) |
359 | return false; |
360 | } |
361 | return true; |
362 | } |
363 | |
364 | bool |
365 | Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, |
366 | const ReduceEntry &Entry) { |
367 | if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) |
368 | return false; |
369 | |
370 | unsigned Scale = 1; |
371 | bool HasImmOffset = false; |
372 | bool HasShift = false; |
373 | bool HasOffReg = true; |
374 | bool isLdStMul = false; |
375 | unsigned Opc = Entry.NarrowOpc1; |
376 | unsigned OpNum = 3; // First 'rest' of operands. |
377 | uint8_t ImmLimit = Entry.Imm1Limit; |
378 | |
379 | switch (Entry.WideOpc) { |
380 | default: |
381 | llvm_unreachable("Unexpected Thumb2 load / store opcode!")::llvm::llvm_unreachable_internal("Unexpected Thumb2 load / store opcode!" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220848/lib/Target/ARM/Thumb2SizeReduction.cpp" , 381); |
382 | case ARM::t2LDRi12: |
383 | case ARM::t2STRi12: |
384 | if (MI->getOperand(1).getReg() == ARM::SP) { |
385 | Opc = Entry.NarrowOpc2; |
386 | ImmLimit = Entry.Imm2Limit; |
387 | HasOffReg = false; |
Value stored to 'HasOffReg' is never read | |
388 | } |
389 | |
390 | Scale = 4; |
391 | HasImmOffset = true; |
392 | HasOffReg = false; |
393 | break; |
394 | case ARM::t2LDRBi12: |
395 | case ARM::t2STRBi12: |
396 | HasImmOffset = true; |
397 | HasOffReg = false; |
398 | break; |
399 | case ARM::t2LDRHi12: |
400 | case ARM::t2STRHi12: |
401 | Scale = 2; |
402 | HasImmOffset = true; |
403 | HasOffReg = false; |
404 | break; |
405 | case ARM::t2LDRs: |
406 | case ARM::t2LDRBs: |
407 | case ARM::t2LDRHs: |
408 | case ARM::t2LDRSBs: |
409 | case ARM::t2LDRSHs: |
410 | case ARM::t2STRs: |
411 | case ARM::t2STRBs: |
412 | case ARM::t2STRHs: |
413 | HasShift = true; |
414 | OpNum = 4; |
415 | break; |
416 | case ARM::t2LDMIA: |
417 | case ARM::t2LDMDB: { |
418 | unsigned BaseReg = MI->getOperand(0).getReg(); |
419 | if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) |
420 | return false; |
421 | |
422 | // For the non-writeback version (this one), the base register must be |
423 | // one of the registers being loaded. |
424 | bool isOK = false; |
425 | for (unsigned i = 4; i < MI->getNumOperands(); ++i) { |
426 | if (MI->getOperand(i).getReg() == BaseReg) { |
427 | isOK = true; |
428 | break; |
429 | } |
430 | } |
431 | |
432 | if (!isOK) |
433 | return false; |
434 | |
435 | OpNum = 0; |
436 | isLdStMul = true; |
437 | break; |
438 | } |
439 | case ARM::t2LDMIA_RET: { |
440 | unsigned BaseReg = MI->getOperand(1).getReg(); |
441 | if (BaseReg != ARM::SP) |
442 | return false; |
443 | Opc = Entry.NarrowOpc2; // tPOP_RET |
444 | OpNum = 2; |
445 | isLdStMul = true; |
446 | break; |
447 | } |
448 | case ARM::t2LDMIA_UPD: |
449 | case ARM::t2LDMDB_UPD: |
450 | case ARM::t2STMIA_UPD: |
451 | case ARM::t2STMDB_UPD: { |
452 | OpNum = 0; |
453 | |
454 | unsigned BaseReg = MI->getOperand(1).getReg(); |
455 | if (BaseReg == ARM::SP && |
456 | (Entry.WideOpc == ARM::t2LDMIA_UPD || |
457 | Entry.WideOpc == ARM::t2STMDB_UPD)) { |
458 | Opc = Entry.NarrowOpc2; // tPOP or tPUSH |
459 | OpNum = 2; |
460 | } else if (!isARMLowRegister(BaseReg) || |
461 | (Entry.WideOpc != ARM::t2LDMIA_UPD && |
462 | Entry.WideOpc != ARM::t2STMIA_UPD)) { |
463 | return false; |
464 | } |
465 | |
466 | isLdStMul = true; |
467 | break; |
468 | } |
469 | } |
470 | |
471 | unsigned OffsetReg = 0; |
472 | bool OffsetKill = false; |
473 | if (HasShift) { |
474 | OffsetReg = MI->getOperand(2).getReg(); |
475 | OffsetKill = MI->getOperand(2).isKill(); |
476 | |
477 | if (MI->getOperand(3).getImm()) |
478 | // Thumb1 addressing mode doesn't support shift. |
479 | return false; |
480 | } |
481 | |
482 | unsigned OffsetImm = 0; |
483 | if (HasImmOffset) { |
484 | OffsetImm = MI->getOperand(2).getImm(); |
485 | unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; |
486 | |
487 | if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) |
488 | // Make sure the immediate field fits. |
489 | return false; |
490 | } |
491 | |
492 | // Add the 16-bit load / store instruction. |
493 | DebugLoc dl = MI->getDebugLoc(); |
494 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); |
495 | if (!isLdStMul) { |
496 | MIB.addOperand(MI->getOperand(0)); |
497 | MIB.addOperand(MI->getOperand(1)); |
498 | |
499 | if (HasImmOffset) |
500 | MIB.addImm(OffsetImm / Scale); |
501 | |
502 | assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!")(((!HasShift || OffsetReg) && "Invalid so_reg load / store address!" ) ? static_cast<void> (0) : __assert_fail ("(!HasShift || OffsetReg) && \"Invalid so_reg load / store address!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220848/lib/Target/ARM/Thumb2SizeReduction.cpp" , 502, __PRETTY_FUNCTION__)); |
503 | |
504 | if (HasOffReg) |
505 | MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); |
506 | } |
507 | |
508 | // Transfer the rest of operands. |
509 | for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) |
510 | MIB.addOperand(MI->getOperand(OpNum)); |
511 | |
512 | // Transfer memoperands. |
513 | MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); |
514 | |
515 | // Transfer MI flags. |
516 | MIB.setMIFlags(MI->getFlags()); |
517 | |
518 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); |
519 | |
520 | MBB.erase_instr(MI); |
521 | ++NumLdSts; |
522 | return true; |
523 | } |
524 | |
525 | bool |
526 | Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, |
527 | const ReduceEntry &Entry, |
528 | bool LiveCPSR, bool IsSelfLoop) { |
529 | unsigned Opc = MI->getOpcode(); |
530 | if (Opc == ARM::t2ADDri) { |
531 | // If the source register is SP, try to reduce to tADDrSPi, otherwise |
532 | // it's a normal reduce. |
533 | if (MI->getOperand(1).getReg() != ARM::SP) { |
534 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
535 | return true; |
536 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
537 | } |
538 | // Try to reduce to tADDrSPi. |
539 | unsigned Imm = MI->getOperand(2).getImm(); |
540 | // The immediate must be in range, the destination register must be a low |
541 | // reg, the predicate must be "always" and the condition flags must not |
542 | // be being set. |
543 | if (Imm & 3 || Imm > 1020) |
544 | return false; |
545 | if (!isARMLowRegister(MI->getOperand(0).getReg())) |
546 | return false; |
547 | if (MI->getOperand(3).getImm() != ARMCC::AL) |
548 | return false; |
549 | const MCInstrDesc &MCID = MI->getDesc(); |
550 | if (MCID.hasOptionalDef() && |
551 | MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) |
552 | return false; |
553 | |
554 | MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), |
555 | TII->get(ARM::tADDrSPi)) |
556 | .addOperand(MI->getOperand(0)) |
557 | .addOperand(MI->getOperand(1)) |
558 | .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. |
559 | AddDefaultPred(MIB); |
560 | |
561 | // Transfer MI flags. |
562 | MIB.setMIFlags(MI->getFlags()); |
563 | |
564 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB; } } while (0 ); |
565 | |
566 | MBB.erase_instr(MI); |
567 | ++NumNarrows; |
568 | return true; |
569 | } |
570 | |
571 | if (Entry.LowRegs1 && !VerifyLowRegs(MI)) |
572 | return false; |
573 | |
574 | if (MI->mayLoad() || MI->mayStore()) |
575 | return ReduceLoadStore(MBB, MI, Entry); |
576 | |
577 | switch (Opc) { |
578 | default: break; |
579 | case ARM::t2ADDSri: |
580 | case ARM::t2ADDSrr: { |
581 | unsigned PredReg = 0; |
582 | if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { |
583 | switch (Opc) { |
584 | default: break; |
585 | case ARM::t2ADDSri: { |
586 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
587 | return true; |
588 | // fallthrough |
589 | } |
590 | case ARM::t2ADDSrr: |
591 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
592 | } |
593 | } |
594 | break; |
595 | } |
596 | case ARM::t2RSBri: |
597 | case ARM::t2RSBSri: |
598 | case ARM::t2SXTB: |
599 | case ARM::t2SXTH: |
600 | case ARM::t2UXTB: |
601 | case ARM::t2UXTH: |
602 | if (MI->getOperand(2).getImm() == 0) |
603 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
604 | break; |
605 | case ARM::t2MOVi16: |
606 | // Can convert only 'pure' immediate operands, not immediates obtained as |
607 | // globals' addresses. |
608 | if (MI->getOperand(1).isImm()) |
609 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
610 | break; |
611 | case ARM::t2CMPrr: { |
612 | // Try to reduce to the lo-reg only version first. Why there are two |
613 | // versions of the instruction is a mystery. |
614 | // It would be nice to just have two entries in the master table that |
615 | // are prioritized, but the table assumes a unique entry for each |
616 | // source insn opcode. So for now, we hack a local entry record to use. |
617 | static const ReduceEntry NarrowEntry = |
618 | { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; |
619 | if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) |
620 | return true; |
621 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
622 | } |
623 | } |
624 | return false; |
625 | } |
626 | |
627 | bool |
628 | Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, |
629 | const ReduceEntry &Entry, |
630 | bool LiveCPSR, bool IsSelfLoop) { |
631 | |
632 | if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) |
633 | return false; |
634 | |
635 | if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && |
636 | STI->avoidMOVsShifterOperand()) |
637 | // Don't issue movs with shifter operand for some CPUs unless we |
638 | // are optimizing / minimizing for size. |
639 | return false; |
640 | |
641 | unsigned Reg0 = MI->getOperand(0).getReg(); |
642 | unsigned Reg1 = MI->getOperand(1).getReg(); |
643 | // t2MUL is "special". The tied source operand is second, not first. |
644 | if (MI->getOpcode() == ARM::t2MUL) { |
645 | unsigned Reg2 = MI->getOperand(2).getReg(); |
646 | // Early exit if the regs aren't all low regs. |
647 | if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) |
648 | || !isARMLowRegister(Reg2)) |
649 | return false; |
650 | if (Reg0 != Reg2) { |
651 | // If the other operand also isn't the same as the destination, we |
652 | // can't reduce. |
653 | if (Reg1 != Reg0) |
654 | return false; |
655 | // Try to commute the operands to make it a 2-address instruction. |
656 | MachineInstr *CommutedMI = TII->commuteInstruction(MI); |
657 | if (!CommutedMI) |
658 | return false; |
659 | } |
660 | } else if (Reg0 != Reg1) { |
661 | // Try to commute the operands to make it a 2-address instruction. |
662 | unsigned CommOpIdx1, CommOpIdx2; |
663 | if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || |
664 | CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) |
665 | return false; |
666 | MachineInstr *CommutedMI = TII->commuteInstruction(MI); |
667 | if (!CommutedMI) |
668 | return false; |
669 | } |
670 | if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) |
671 | return false; |
672 | if (Entry.Imm2Limit) { |
673 | unsigned Imm = MI->getOperand(2).getImm(); |
674 | unsigned Limit = (1 << Entry.Imm2Limit) - 1; |
675 | if (Imm > Limit) |
676 | return false; |
677 | } else { |
678 | unsigned Reg2 = MI->getOperand(2).getReg(); |
679 | if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) |
680 | return false; |
681 | } |
682 | |
683 | // Check if it's possible / necessary to transfer the predicate. |
684 | const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); |
685 | unsigned PredReg = 0; |
686 | ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); |
687 | bool SkipPred = false; |
688 | if (Pred != ARMCC::AL) { |
689 | if (!NewMCID.isPredicable()) |
690 | // Can't transfer predicate, fail. |
691 | return false; |
692 | } else { |
693 | SkipPred = !NewMCID.isPredicable(); |
694 | } |
695 | |
696 | bool HasCC = false; |
697 | bool CCDead = false; |
698 | const MCInstrDesc &MCID = MI->getDesc(); |
699 | if (MCID.hasOptionalDef()) { |
700 | unsigned NumOps = MCID.getNumOperands(); |
701 | HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); |
702 | if (HasCC && MI->getOperand(NumOps-1).isDead()) |
703 | CCDead = true; |
704 | } |
705 | if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) |
706 | return false; |
707 | |
708 | // Avoid adding a false dependency on partial flag update by some 16-bit |
709 | // instructions which has the 's' bit set. |
710 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && |
711 | canAddPseudoFlagDep(MI, IsSelfLoop)) |
712 | return false; |
713 | |
714 | // Add the 16-bit instruction. |
715 | DebugLoc dl = MI->getDebugLoc(); |
716 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); |
717 | MIB.addOperand(MI->getOperand(0)); |
718 | if (NewMCID.hasOptionalDef()) { |
719 | if (HasCC) |
720 | AddDefaultT1CC(MIB, CCDead); |
721 | else |
722 | AddNoT1CC(MIB); |
723 | } |
724 | |
725 | // Transfer the rest of operands. |
726 | unsigned NumOps = MCID.getNumOperands(); |
727 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { |
728 | if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) |
729 | continue; |
730 | if (SkipPred && MCID.OpInfo[i].isPredicate()) |
731 | continue; |
732 | MIB.addOperand(MI->getOperand(i)); |
733 | } |
734 | |
735 | // Transfer MI flags. |
736 | MIB.setMIFlags(MI->getFlags()); |
737 | |
738 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); |
739 | |
740 | MBB.erase_instr(MI); |
741 | ++Num2Addrs; |
742 | return true; |
743 | } |
744 | |
745 | bool |
746 | Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, |
747 | const ReduceEntry &Entry, |
748 | bool LiveCPSR, bool IsSelfLoop) { |
749 | if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) |
750 | return false; |
751 | |
752 | if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && |
753 | STI->avoidMOVsShifterOperand()) |
754 | // Don't issue movs with shifter operand for some CPUs unless we |
755 | // are optimizing / minimizing for size. |
756 | return false; |
757 | |
758 | unsigned Limit = ~0U; |
759 | if (Entry.Imm1Limit) |
760 | Limit = (1 << Entry.Imm1Limit) - 1; |
761 | |
762 | const MCInstrDesc &MCID = MI->getDesc(); |
763 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { |
764 | if (MCID.OpInfo[i].isPredicate()) |
765 | continue; |
766 | const MachineOperand &MO = MI->getOperand(i); |
767 | if (MO.isReg()) { |
768 | unsigned Reg = MO.getReg(); |
769 | if (!Reg || Reg == ARM::CPSR) |
770 | continue; |
771 | if (Entry.LowRegs1 && !isARMLowRegister(Reg)) |
772 | return false; |
773 | } else if (MO.isImm() && |
774 | !MCID.OpInfo[i].isPredicate()) { |
775 | if (((unsigned)MO.getImm()) > Limit) |
776 | return false; |
777 | } |
778 | } |
779 | |
780 | // Check if it's possible / necessary to transfer the predicate. |
781 | const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); |
782 | unsigned PredReg = 0; |
783 | ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); |
784 | bool SkipPred = false; |
785 | if (Pred != ARMCC::AL) { |
786 | if (!NewMCID.isPredicable()) |
787 | // Can't transfer predicate, fail. |
788 | return false; |
789 | } else { |
790 | SkipPred = !NewMCID.isPredicable(); |
791 | } |
792 | |
793 | bool HasCC = false; |
794 | bool CCDead = false; |
795 | if (MCID.hasOptionalDef()) { |
796 | unsigned NumOps = MCID.getNumOperands(); |
797 | HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); |
798 | if (HasCC && MI->getOperand(NumOps-1).isDead()) |
799 | CCDead = true; |
800 | } |
801 | if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) |
802 | return false; |
803 | |
804 | // Avoid adding a false dependency on partial flag update by some 16-bit |
805 | // instructions which has the 's' bit set. |
806 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && |
807 | canAddPseudoFlagDep(MI, IsSelfLoop)) |
808 | return false; |
809 | |
810 | // Add the 16-bit instruction. |
811 | DebugLoc dl = MI->getDebugLoc(); |
812 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); |
813 | MIB.addOperand(MI->getOperand(0)); |
814 | if (NewMCID.hasOptionalDef()) { |
815 | if (HasCC) |
816 | AddDefaultT1CC(MIB, CCDead); |
817 | else |
818 | AddNoT1CC(MIB); |
819 | } |
820 | |
821 | // Transfer the rest of operands. |
822 | unsigned NumOps = MCID.getNumOperands(); |
823 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { |
824 | if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) |
825 | continue; |
826 | if ((MCID.getOpcode() == ARM::t2RSBSri || |
827 | MCID.getOpcode() == ARM::t2RSBri || |
828 | MCID.getOpcode() == ARM::t2SXTB || |
829 | MCID.getOpcode() == ARM::t2SXTH || |
830 | MCID.getOpcode() == ARM::t2UXTB || |
831 | MCID.getOpcode() == ARM::t2UXTH) && i == 2) |
832 | // Skip the zero immediate operand, it's now implicit. |
833 | continue; |
834 | bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); |
835 | if (SkipPred && isPred) |
836 | continue; |
837 | const MachineOperand &MO = MI->getOperand(i); |
838 | if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) |
839 | // Skip implicit def of CPSR. Either it's modeled as an optional |
840 | // def now or it's already an implicit def on the new instruction. |
841 | continue; |
842 | MIB.addOperand(MO); |
843 | } |
844 | if (!MCID.isPredicable() && NewMCID.isPredicable()) |
845 | AddDefaultPred(MIB); |
846 | |
847 | // Transfer MI flags. |
848 | MIB.setMIFlags(MI->getFlags()); |
849 | |
850 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); |
851 | |
852 | MBB.erase_instr(MI); |
853 | ++NumNarrows; |
854 | return true; |
855 | } |
856 | |
857 | static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { |
858 | bool HasDef = false; |
859 | for (const MachineOperand &MO : MI.operands()) { |
860 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
861 | continue; |
862 | if (MO.getReg() != ARM::CPSR) |
863 | continue; |
864 | |
865 | DefCPSR = true; |
866 | if (!MO.isDead()) |
867 | HasDef = true; |
868 | } |
869 | |
870 | return HasDef || LiveCPSR; |
871 | } |
872 | |
873 | static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { |
874 | for (const MachineOperand &MO : MI.operands()) { |
875 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) |
876 | continue; |
877 | if (MO.getReg() != ARM::CPSR) |
878 | continue; |
879 | assert(LiveCPSR && "CPSR liveness tracking is wrong!")((LiveCPSR && "CPSR liveness tracking is wrong!") ? static_cast <void> (0) : __assert_fail ("LiveCPSR && \"CPSR liveness tracking is wrong!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.6~svn220848/lib/Target/ARM/Thumb2SizeReduction.cpp" , 879, __PRETTY_FUNCTION__)); |
880 | if (MO.isKill()) { |
881 | LiveCPSR = false; |
882 | break; |
883 | } |
884 | } |
885 | |
886 | return LiveCPSR; |
887 | } |
888 | |
889 | bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, |
890 | bool LiveCPSR, bool IsSelfLoop) { |
891 | unsigned Opcode = MI->getOpcode(); |
892 | DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); |
893 | if (OPI == ReduceOpcodeMap.end()) |
894 | return false; |
895 | const ReduceEntry &Entry = ReduceTable[OPI->second]; |
896 | |
897 | // Don't attempt normal reductions on "special" cases for now. |
898 | if (Entry.Special) |
899 | return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
900 | |
901 | // Try to transform to a 16-bit two-address instruction. |
902 | if (Entry.NarrowOpc2 && |
903 | ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
904 | return true; |
905 | |
906 | // Try to transform to a 16-bit non-two-address instruction. |
907 | if (Entry.NarrowOpc1 && |
908 | ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
909 | return true; |
910 | |
911 | return false; |
912 | } |
913 | |
914 | bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { |
915 | bool Modified = false; |
916 | |
917 | // Yes, CPSR could be livein. |
918 | bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); |
919 | MachineInstr *BundleMI = nullptr; |
920 | |
921 | CPSRDef = nullptr; |
922 | HighLatencyCPSR = false; |
923 | |
924 | // Check predecessors for the latest CPSRDef. |
925 | for (auto *Pred : MBB.predecessors()) { |
926 | const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; |
927 | if (!PInfo.Visited) { |
928 | // Since blocks are visited in RPO, this must be a back-edge. |
929 | continue; |
930 | } |
931 | if (PInfo.HighLatencyCPSR) { |
932 | HighLatencyCPSR = true; |
933 | break; |
934 | } |
935 | } |
936 | |
937 | // If this BB loops back to itself, conservatively avoid narrowing the |
938 | // first instruction that does partial flag update. |
939 | bool IsSelfLoop = MBB.isSuccessor(&MBB); |
940 | MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); |
941 | MachineBasicBlock::instr_iterator NextMII; |
942 | for (; MII != E; MII = NextMII) { |
943 | NextMII = std::next(MII); |
944 | |
945 | MachineInstr *MI = &*MII; |
946 | if (MI->isBundle()) { |
947 | BundleMI = MI; |
948 | continue; |
949 | } |
950 | if (MI->isDebugValue()) |
951 | continue; |
952 | |
953 | LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); |
954 | |
955 | // Does NextMII belong to the same bundle as MI? |
956 | bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); |
957 | |
958 | if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { |
959 | Modified = true; |
960 | MachineBasicBlock::instr_iterator I = std::prev(NextMII); |
961 | MI = &*I; |
962 | // Removing and reinserting the first instruction in a bundle will break |
963 | // up the bundle. Fix the bundling if it was broken. |
964 | if (NextInSameBundle && !NextMII->isBundledWithPred()) |
965 | NextMII->bundleWithPred(); |
966 | } |
967 | |
968 | if (!NextInSameBundle && MI->isInsideBundle()) { |
969 | // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill |
970 | // marker is only on the BUNDLE instruction. Process the BUNDLE |
971 | // instruction as we finish with the bundled instruction to work around |
972 | // the inconsistency. |
973 | if (BundleMI->killsRegister(ARM::CPSR)) |
974 | LiveCPSR = false; |
975 | MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); |
976 | if (MO && !MO->isDead()) |
977 | LiveCPSR = true; |
978 | MO = BundleMI->findRegisterUseOperand(ARM::CPSR); |
979 | if (MO && !MO->isKill()) |
980 | LiveCPSR = true; |
981 | } |
982 | |
983 | bool DefCPSR = false; |
984 | LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); |
985 | if (MI->isCall()) { |
986 | // Calls don't really set CPSR. |
987 | CPSRDef = nullptr; |
988 | HighLatencyCPSR = false; |
989 | IsSelfLoop = false; |
990 | } else if (DefCPSR) { |
991 | // This is the last CPSR defining instruction. |
992 | CPSRDef = MI; |
993 | HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); |
994 | IsSelfLoop = false; |
995 | } |
996 | } |
997 | |
998 | MBBInfo &Info = BlockInfo[MBB.getNumber()]; |
999 | Info.HighLatencyCPSR = HighLatencyCPSR; |
1000 | Info.Visited = true; |
1001 | return Modified; |
1002 | } |
1003 | |
1004 | bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { |
1005 | const TargetMachine &TM = MF.getTarget(); |
1006 | TII = static_cast<const Thumb2InstrInfo *>( |
1007 | TM.getSubtargetImpl()->getInstrInfo()); |
1008 | STI = &TM.getSubtarget<ARMSubtarget>(); |
1009 | |
1010 | // Optimizing / minimizing size? |
1011 | AttributeSet FnAttrs = MF.getFunction()->getAttributes(); |
1012 | OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, |
1013 | Attribute::OptimizeForSize); |
1014 | MinimizeSize = |
1015 | FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); |
1016 | |
1017 | BlockInfo.clear(); |
1018 | BlockInfo.resize(MF.getNumBlockIDs()); |
1019 | |
1020 | // Visit blocks in reverse post-order so LastCPSRDef is known for all |
1021 | // predecessors. |
1022 | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); |
1023 | bool Modified = false; |
1024 | for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator |
1025 | I = RPOT.begin(), E = RPOT.end(); I != E; ++I) |
1026 | Modified |= ReduceMBB(**I); |
1027 | return Modified; |
1028 | } |
1029 | |
1030 | /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size |
1031 | /// reduction pass. |
1032 | FunctionPass *llvm::createThumb2SizeReductionPass() { |
1033 | return new Thumb2SizeReduce(); |
1034 | } |