File: | lib/Target/ARM/Thumb2SizeReduction.cpp |
Location: | line 996, column 11 |
Description: | Called C++ object pointer is null |
1 | //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | ||||
10 | #include "ARM.h" | |||
11 | #include "ARMBaseInstrInfo.h" | |||
12 | #include "ARMSubtarget.h" | |||
13 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
14 | #include "Thumb2InstrInfo.h" | |||
15 | #include "llvm/ADT/DenseMap.h" | |||
16 | #include "llvm/ADT/PostOrderIterator.h" | |||
17 | #include "llvm/ADT/Statistic.h" | |||
18 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
19 | #include "llvm/CodeGen/MachineInstr.h" | |||
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
21 | #include "llvm/IR/Function.h" // To access Function attributes | |||
22 | #include "llvm/Support/CommandLine.h" | |||
23 | #include "llvm/Support/Debug.h" | |||
24 | #include "llvm/Support/raw_ostream.h" | |||
25 | #include "llvm/Target/TargetMachine.h" | |||
26 | using namespace llvm; | |||
27 | ||||
28 | #define DEBUG_TYPE"t2-reduce-size" "t2-reduce-size" | |||
29 | ||||
30 | STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones")static llvm::Statistic NumNarrows = { "t2-reduce-size", "Number of 32-bit instrs reduced to 16-bit ones" , 0, 0 }; | |||
31 | STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones")static llvm::Statistic Num2Addrs = { "t2-reduce-size", "Number of 32-bit instrs reduced to 2addr 16-bit ones" , 0, 0 }; | |||
32 | STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones")static llvm::Statistic NumLdSts = { "t2-reduce-size", "Number of 32-bit load / store reduced to 16-bit ones" , 0, 0 }; | |||
33 | ||||
34 | static cl::opt<int> ReduceLimit("t2-reduce-limit", | |||
35 | cl::init(-1), cl::Hidden); | |||
36 | static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", | |||
37 | cl::init(-1), cl::Hidden); | |||
38 | static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", | |||
39 | cl::init(-1), cl::Hidden); | |||
40 | ||||
41 | namespace { | |||
42 | /// ReduceTable - A static table with information on mapping from wide | |||
43 | /// opcodes to narrow | |||
44 | struct ReduceEntry { | |||
45 | uint16_t WideOpc; // Wide opcode | |||
46 | uint16_t NarrowOpc1; // Narrow opcode to transform to | |||
47 | uint16_t NarrowOpc2; // Narrow opcode when it's two-address | |||
48 | uint8_t Imm1Limit; // Limit of immediate field (bits) | |||
49 | uint8_t Imm2Limit; // Limit of immediate field when it's two-address | |||
50 | unsigned LowRegs1 : 1; // Only possible if low-registers are used | |||
51 | unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) | |||
52 | unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. | |||
53 | // 1 - No cc field. | |||
54 | // 2 - Always set CPSR. | |||
55 | unsigned PredCC2 : 2; | |||
56 | unsigned PartFlag : 1; // 16-bit instruction does partial flag update | |||
57 | unsigned Special : 1; // Needs to be dealt with specially | |||
58 | unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) | |||
59 | }; | |||
60 | ||||
61 | static const ReduceEntry ReduceTable[] = { | |||
62 | // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM | |||
63 | { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, | |||
64 | { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, | |||
65 | { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, | |||
66 | { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, | |||
67 | { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, | |||
68 | { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
69 | { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, | |||
70 | { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, | |||
71 | { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
72 | //FIXME: Disable CMN, as CCodes are backwards from compare expectations | |||
73 | //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, | |||
74 | { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, | |||
75 | { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, | |||
76 | { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, | |||
77 | { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
78 | // FIXME: adr.n immediate offset must be multiple of 4. | |||
79 | //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, | |||
80 | { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, | |||
81 | { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, | |||
82 | { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, | |||
83 | { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, | |||
84 | { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, | |||
85 | { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, | |||
86 | // FIXME: Do we need the 16-bit 'S' variant? | |||
87 | { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, | |||
88 | { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
89 | { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, | |||
90 | { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
91 | { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, | |||
92 | { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, | |||
93 | { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, | |||
94 | { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, | |||
95 | { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
96 | { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, | |||
97 | { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, | |||
98 | { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, | |||
99 | { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, | |||
100 | { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, | |||
101 | { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, | |||
102 | { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, | |||
103 | { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, | |||
104 | { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, | |||
105 | { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, | |||
106 | { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, | |||
107 | ||||
108 | // FIXME: Clean this up after splitting each Thumb load / store opcode | |||
109 | // into multiple ones. | |||
110 | { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, | |||
111 | { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
112 | { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, | |||
113 | { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
114 | { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, | |||
115 | { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
116 | { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
117 | { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
118 | { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, | |||
119 | { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
120 | { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, | |||
121 | { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
122 | { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, | |||
123 | { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, | |||
124 | ||||
125 | { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, | |||
126 | { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, | |||
127 | { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, | |||
128 | // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. | |||
129 | // tSTMIA_UPD is a change in semantics which can only be used if the base | |||
130 | // register is killed. This difference is correctly handled elsewhere. | |||
131 | { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, | |||
132 | { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, | |||
133 | { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } | |||
134 | }; | |||
135 | ||||
136 | class Thumb2SizeReduce : public MachineFunctionPass { | |||
137 | public: | |||
138 | static char ID; | |||
139 | Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); | |||
140 | ||||
141 | const Thumb2InstrInfo *TII; | |||
142 | const ARMSubtarget *STI; | |||
143 | ||||
144 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
145 | ||||
146 | MachineFunctionProperties getRequiredProperties() const override { | |||
147 | return MachineFunctionProperties().set( | |||
148 | MachineFunctionProperties::Property::AllVRegsAllocated); | |||
149 | } | |||
150 | ||||
151 | const char *getPassName() const override { | |||
152 | return "Thumb2 instruction size reduction pass"; | |||
153 | } | |||
154 | ||||
155 | private: | |||
156 | /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. | |||
157 | DenseMap<unsigned, unsigned> ReduceOpcodeMap; | |||
158 | ||||
159 | bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); | |||
160 | ||||
161 | bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, | |||
162 | bool is2Addr, ARMCC::CondCodes Pred, | |||
163 | bool LiveCPSR, bool &HasCC, bool &CCDead); | |||
164 | ||||
165 | bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, | |||
166 | const ReduceEntry &Entry); | |||
167 | ||||
168 | bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, | |||
169 | const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); | |||
170 | ||||
171 | /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address | |||
172 | /// instruction. | |||
173 | bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, | |||
174 | const ReduceEntry &Entry, bool LiveCPSR, | |||
175 | bool IsSelfLoop); | |||
176 | ||||
177 | /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit | |||
178 | /// non-two-address instruction. | |||
179 | bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, | |||
180 | const ReduceEntry &Entry, bool LiveCPSR, | |||
181 | bool IsSelfLoop); | |||
182 | ||||
183 | /// ReduceMI - Attempt to reduce MI, return true on success. | |||
184 | bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, | |||
185 | bool LiveCPSR, bool IsSelfLoop); | |||
186 | ||||
187 | /// ReduceMBB - Reduce width of instructions in the specified basic block. | |||
188 | bool ReduceMBB(MachineBasicBlock &MBB); | |||
189 | ||||
190 | bool OptimizeSize; | |||
191 | bool MinimizeSize; | |||
192 | ||||
193 | // Last instruction to define CPSR in the current block. | |||
194 | MachineInstr *CPSRDef; | |||
195 | // Was CPSR last defined by a high latency instruction? | |||
196 | // When CPSRDef is null, this refers to CPSR defs in predecessors. | |||
197 | bool HighLatencyCPSR; | |||
198 | ||||
199 | struct MBBInfo { | |||
200 | // The flags leaving this block have high latency. | |||
201 | bool HighLatencyCPSR; | |||
202 | // Has this block been visited yet? | |||
203 | bool Visited; | |||
204 | ||||
205 | MBBInfo() : HighLatencyCPSR(false), Visited(false) {} | |||
206 | }; | |||
207 | ||||
208 | SmallVector<MBBInfo, 8> BlockInfo; | |||
209 | ||||
210 | std::function<bool(const Function &)> PredicateFtor; | |||
211 | }; | |||
212 | char Thumb2SizeReduce::ID = 0; | |||
213 | } | |||
214 | ||||
215 | Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) | |||
216 | : MachineFunctionPass(ID), PredicateFtor(Ftor) { | |||
217 | OptimizeSize = MinimizeSize = false; | |||
218 | for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { | |||
219 | unsigned FromOpc = ReduceTable[i].WideOpc; | |||
220 | if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) | |||
221 | llvm_unreachable("Duplicated entries?")::llvm::llvm_unreachable_internal("Duplicated entries?", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/Thumb2SizeReduction.cpp" , 221); | |||
222 | } | |||
223 | } | |||
224 | ||||
225 | static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { | |||
226 | for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) | |||
227 | if (*Regs == ARM::CPSR) | |||
228 | return true; | |||
229 | return false; | |||
230 | } | |||
231 | ||||
232 | // Check for a likely high-latency flag def. | |||
233 | static bool isHighLatencyCPSR(MachineInstr *Def) { | |||
234 | switch(Def->getOpcode()) { | |||
235 | case ARM::FMSTAT: | |||
236 | case ARM::tMUL: | |||
237 | return true; | |||
238 | } | |||
239 | return false; | |||
240 | } | |||
241 | ||||
242 | /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, | |||
243 | /// the 's' 16-bit instruction partially update CPSR. Abort the | |||
244 | /// transformation to avoid adding false dependency on last CPSR setting | |||
245 | /// instruction which hurts the ability for out-of-order execution engine | |||
246 | /// to do register renaming magic. | |||
247 | /// This function checks if there is a read-of-write dependency between the | |||
248 | /// last instruction that defines the CPSR and the current instruction. If there | |||
249 | /// is, then there is no harm done since the instruction cannot be retired | |||
250 | /// before the CPSR setting instruction anyway. | |||
251 | /// Note, we are not doing full dependency analysis here for the sake of compile | |||
252 | /// time. We're not looking for cases like: | |||
253 | /// r0 = muls ... | |||
254 | /// r1 = add.w r0, ... | |||
255 | /// ... | |||
256 | /// = mul.w r1 | |||
257 | /// In this case it would have been ok to narrow the mul.w to muls since there | |||
258 | /// are indirect RAW dependency between the muls and the mul.w | |||
259 | bool | |||
260 | Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { | |||
261 | // Disable the check for -Oz (aka OptimizeForSizeHarder). | |||
262 | if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) | |||
263 | return false; | |||
264 | ||||
265 | if (!CPSRDef) | |||
266 | // If this BB loops back to itself, conservatively avoid narrowing the | |||
267 | // first instruction that does partial flag update. | |||
268 | return HighLatencyCPSR || FirstInSelfLoop; | |||
269 | ||||
270 | SmallSet<unsigned, 2> Defs; | |||
271 | for (const MachineOperand &MO : CPSRDef->operands()) { | |||
272 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) | |||
273 | continue; | |||
274 | unsigned Reg = MO.getReg(); | |||
275 | if (Reg == 0 || Reg == ARM::CPSR) | |||
276 | continue; | |||
277 | Defs.insert(Reg); | |||
278 | } | |||
279 | ||||
280 | for (const MachineOperand &MO : Use->operands()) { | |||
281 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) | |||
282 | continue; | |||
283 | unsigned Reg = MO.getReg(); | |||
284 | if (Defs.count(Reg)) | |||
285 | return false; | |||
286 | } | |||
287 | ||||
288 | // If the current CPSR has high latency, try to avoid the false dependency. | |||
289 | if (HighLatencyCPSR) | |||
290 | return true; | |||
291 | ||||
292 | // tMOVi8 usually doesn't start long dependency chains, and there are a lot | |||
293 | // of them, so always shrink them when CPSR doesn't have high latency. | |||
294 | if (Use->getOpcode() == ARM::t2MOVi || | |||
295 | Use->getOpcode() == ARM::t2MOVi16) | |||
296 | return false; | |||
297 | ||||
298 | // No read-after-write dependency. The narrowing will add false dependency. | |||
299 | return true; | |||
300 | } | |||
301 | ||||
302 | bool | |||
303 | Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, | |||
304 | bool is2Addr, ARMCC::CondCodes Pred, | |||
305 | bool LiveCPSR, bool &HasCC, bool &CCDead) { | |||
306 | if ((is2Addr && Entry.PredCC2 == 0) || | |||
307 | (!is2Addr && Entry.PredCC1 == 0)) { | |||
308 | if (Pred == ARMCC::AL) { | |||
309 | // Not predicated, must set CPSR. | |||
310 | if (!HasCC) { | |||
311 | // Original instruction was not setting CPSR, but CPSR is not | |||
312 | // currently live anyway. It's ok to set it. The CPSR def is | |||
313 | // dead though. | |||
314 | if (!LiveCPSR) { | |||
315 | HasCC = true; | |||
316 | CCDead = true; | |||
317 | return true; | |||
318 | } | |||
319 | return false; | |||
320 | } | |||
321 | } else { | |||
322 | // Predicated, must not set CPSR. | |||
323 | if (HasCC) | |||
324 | return false; | |||
325 | } | |||
326 | } else if ((is2Addr && Entry.PredCC2 == 2) || | |||
327 | (!is2Addr && Entry.PredCC1 == 2)) { | |||
328 | /// Old opcode has an optional def of CPSR. | |||
329 | if (HasCC) | |||
330 | return true; | |||
331 | // If old opcode does not implicitly define CPSR, then it's not ok since | |||
332 | // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. | |||
333 | if (!HasImplicitCPSRDef(MI->getDesc())) | |||
334 | return false; | |||
335 | HasCC = true; | |||
336 | } else { | |||
337 | // 16-bit instruction does not set CPSR. | |||
338 | if (HasCC) | |||
339 | return false; | |||
340 | } | |||
341 | ||||
342 | return true; | |||
343 | } | |||
344 | ||||
345 | static bool VerifyLowRegs(MachineInstr *MI) { | |||
346 | unsigned Opc = MI->getOpcode(); | |||
347 | bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD); | |||
348 | bool isLROk = (Opc == ARM::t2STMDB_UPD); | |||
349 | bool isSPOk = isPCOk || isLROk; | |||
350 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
351 | const MachineOperand &MO = MI->getOperand(i); | |||
352 | if (!MO.isReg() || MO.isImplicit()) | |||
353 | continue; | |||
354 | unsigned Reg = MO.getReg(); | |||
355 | if (Reg == 0 || Reg == ARM::CPSR) | |||
356 | continue; | |||
357 | if (isPCOk && Reg == ARM::PC) | |||
358 | continue; | |||
359 | if (isLROk && Reg == ARM::LR) | |||
360 | continue; | |||
361 | if (Reg == ARM::SP) { | |||
362 | if (isSPOk) | |||
363 | continue; | |||
364 | if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) | |||
365 | // Special case for these ldr / str with sp as base register. | |||
366 | continue; | |||
367 | } | |||
368 | if (!isARMLowRegister(Reg)) | |||
369 | return false; | |||
370 | } | |||
371 | return true; | |||
372 | } | |||
373 | ||||
374 | bool | |||
375 | Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, | |||
376 | const ReduceEntry &Entry) { | |||
377 | if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) | |||
378 | return false; | |||
379 | ||||
380 | unsigned Scale = 1; | |||
381 | bool HasImmOffset = false; | |||
382 | bool HasShift = false; | |||
383 | bool HasOffReg = true; | |||
384 | bool isLdStMul = false; | |||
385 | unsigned Opc = Entry.NarrowOpc1; | |||
386 | unsigned OpNum = 3; // First 'rest' of operands. | |||
387 | uint8_t ImmLimit = Entry.Imm1Limit; | |||
388 | ||||
389 | switch (Entry.WideOpc) { | |||
390 | default: | |||
391 | llvm_unreachable("Unexpected Thumb2 load / store opcode!")::llvm::llvm_unreachable_internal("Unexpected Thumb2 load / store opcode!" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/Thumb2SizeReduction.cpp" , 391); | |||
392 | case ARM::t2LDRi12: | |||
393 | case ARM::t2STRi12: | |||
394 | if (MI->getOperand(1).getReg() == ARM::SP) { | |||
395 | Opc = Entry.NarrowOpc2; | |||
396 | ImmLimit = Entry.Imm2Limit; | |||
397 | } | |||
398 | ||||
399 | Scale = 4; | |||
400 | HasImmOffset = true; | |||
401 | HasOffReg = false; | |||
402 | break; | |||
403 | case ARM::t2LDRBi12: | |||
404 | case ARM::t2STRBi12: | |||
405 | HasImmOffset = true; | |||
406 | HasOffReg = false; | |||
407 | break; | |||
408 | case ARM::t2LDRHi12: | |||
409 | case ARM::t2STRHi12: | |||
410 | Scale = 2; | |||
411 | HasImmOffset = true; | |||
412 | HasOffReg = false; | |||
413 | break; | |||
414 | case ARM::t2LDRs: | |||
415 | case ARM::t2LDRBs: | |||
416 | case ARM::t2LDRHs: | |||
417 | case ARM::t2LDRSBs: | |||
418 | case ARM::t2LDRSHs: | |||
419 | case ARM::t2STRs: | |||
420 | case ARM::t2STRBs: | |||
421 | case ARM::t2STRHs: | |||
422 | HasShift = true; | |||
423 | OpNum = 4; | |||
424 | break; | |||
425 | case ARM::t2LDMIA: { | |||
426 | unsigned BaseReg = MI->getOperand(0).getReg(); | |||
427 | assert(isARMLowRegister(BaseReg))((isARMLowRegister(BaseReg)) ? static_cast<void> (0) : __assert_fail ("isARMLowRegister(BaseReg)", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/Thumb2SizeReduction.cpp" , 427, __PRETTY_FUNCTION__)); | |||
428 | ||||
429 | // For the non-writeback version (this one), the base register must be | |||
430 | // one of the registers being loaded. | |||
431 | bool isOK = false; | |||
432 | for (unsigned i = 3; i < MI->getNumOperands(); ++i) { | |||
433 | if (MI->getOperand(i).getReg() == BaseReg) { | |||
434 | isOK = true; | |||
435 | break; | |||
436 | } | |||
437 | } | |||
438 | ||||
439 | if (!isOK) | |||
440 | return false; | |||
441 | ||||
442 | OpNum = 0; | |||
443 | isLdStMul = true; | |||
444 | break; | |||
445 | } | |||
446 | case ARM::t2STMIA: { | |||
447 | // If the base register is killed, we don't care what its value is after the | |||
448 | // instruction, so we can use an updating STMIA. | |||
449 | if (!MI->getOperand(0).isKill()) | |||
450 | return false; | |||
451 | ||||
452 | break; | |||
453 | } | |||
454 | case ARM::t2LDMIA_RET: { | |||
455 | unsigned BaseReg = MI->getOperand(1).getReg(); | |||
456 | if (BaseReg != ARM::SP) | |||
457 | return false; | |||
458 | Opc = Entry.NarrowOpc2; // tPOP_RET | |||
459 | OpNum = 2; | |||
460 | isLdStMul = true; | |||
461 | break; | |||
462 | } | |||
463 | case ARM::t2LDMIA_UPD: | |||
464 | case ARM::t2STMIA_UPD: | |||
465 | case ARM::t2STMDB_UPD: { | |||
466 | OpNum = 0; | |||
467 | ||||
468 | unsigned BaseReg = MI->getOperand(1).getReg(); | |||
469 | if (BaseReg == ARM::SP && | |||
470 | (Entry.WideOpc == ARM::t2LDMIA_UPD || | |||
471 | Entry.WideOpc == ARM::t2STMDB_UPD)) { | |||
472 | Opc = Entry.NarrowOpc2; // tPOP or tPUSH | |||
473 | OpNum = 2; | |||
474 | } else if (!isARMLowRegister(BaseReg) || | |||
475 | (Entry.WideOpc != ARM::t2LDMIA_UPD && | |||
476 | Entry.WideOpc != ARM::t2STMIA_UPD)) { | |||
477 | return false; | |||
478 | } | |||
479 | ||||
480 | isLdStMul = true; | |||
481 | break; | |||
482 | } | |||
483 | } | |||
484 | ||||
485 | unsigned OffsetReg = 0; | |||
486 | bool OffsetKill = false; | |||
487 | bool OffsetInternal = false; | |||
488 | if (HasShift) { | |||
489 | OffsetReg = MI->getOperand(2).getReg(); | |||
490 | OffsetKill = MI->getOperand(2).isKill(); | |||
491 | OffsetInternal = MI->getOperand(2).isInternalRead(); | |||
492 | ||||
493 | if (MI->getOperand(3).getImm()) | |||
494 | // Thumb1 addressing mode doesn't support shift. | |||
495 | return false; | |||
496 | } | |||
497 | ||||
498 | unsigned OffsetImm = 0; | |||
499 | if (HasImmOffset) { | |||
500 | OffsetImm = MI->getOperand(2).getImm(); | |||
501 | unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; | |||
502 | ||||
503 | if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) | |||
504 | // Make sure the immediate field fits. | |||
505 | return false; | |||
506 | } | |||
507 | ||||
508 | // Add the 16-bit load / store instruction. | |||
509 | DebugLoc dl = MI->getDebugLoc(); | |||
510 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); | |||
511 | ||||
512 | // tSTMIA_UPD takes a defining register operand. We've already checked that | |||
513 | // the register is killed, so mark it as dead here. | |||
514 | if (Entry.WideOpc == ARM::t2STMIA) | |||
515 | MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); | |||
516 | ||||
517 | if (!isLdStMul) { | |||
518 | MIB.addOperand(MI->getOperand(0)); | |||
519 | MIB.addOperand(MI->getOperand(1)); | |||
520 | ||||
521 | if (HasImmOffset) | |||
522 | MIB.addImm(OffsetImm / Scale); | |||
523 | ||||
524 | assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!")(((!HasShift || OffsetReg) && "Invalid so_reg load / store address!" ) ? static_cast<void> (0) : __assert_fail ("(!HasShift || OffsetReg) && \"Invalid so_reg load / store address!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/Thumb2SizeReduction.cpp" , 524, __PRETTY_FUNCTION__)); | |||
525 | ||||
526 | if (HasOffReg) | |||
527 | MIB.addReg(OffsetReg, getKillRegState(OffsetKill) | | |||
528 | getInternalReadRegState(OffsetInternal)); | |||
529 | } | |||
530 | ||||
531 | // Transfer the rest of operands. | |||
532 | for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) | |||
533 | MIB.addOperand(MI->getOperand(OpNum)); | |||
534 | ||||
535 | // Transfer memoperands. | |||
536 | MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); | |||
537 | ||||
538 | // Transfer MI flags. | |||
539 | MIB.setMIFlags(MI->getFlags()); | |||
540 | ||||
541 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); | |||
542 | ||||
543 | MBB.erase_instr(MI); | |||
544 | ++NumLdSts; | |||
545 | return true; | |||
546 | } | |||
547 | ||||
548 | bool | |||
549 | Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, | |||
550 | const ReduceEntry &Entry, | |||
551 | bool LiveCPSR, bool IsSelfLoop) { | |||
552 | unsigned Opc = MI->getOpcode(); | |||
553 | if (Opc == ARM::t2ADDri) { | |||
554 | // If the source register is SP, try to reduce to tADDrSPi, otherwise | |||
555 | // it's a normal reduce. | |||
556 | if (MI->getOperand(1).getReg() != ARM::SP) { | |||
557 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) | |||
558 | return true; | |||
559 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
560 | } | |||
561 | // Try to reduce to tADDrSPi. | |||
562 | unsigned Imm = MI->getOperand(2).getImm(); | |||
563 | // The immediate must be in range, the destination register must be a low | |||
564 | // reg, the predicate must be "always" and the condition flags must not | |||
565 | // be being set. | |||
566 | if (Imm & 3 || Imm > 1020) | |||
567 | return false; | |||
568 | if (!isARMLowRegister(MI->getOperand(0).getReg())) | |||
569 | return false; | |||
570 | if (MI->getOperand(3).getImm() != ARMCC::AL) | |||
571 | return false; | |||
572 | const MCInstrDesc &MCID = MI->getDesc(); | |||
573 | if (MCID.hasOptionalDef() && | |||
574 | MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) | |||
575 | return false; | |||
576 | ||||
577 | MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), | |||
578 | TII->get(ARM::tADDrSPi)) | |||
579 | .addOperand(MI->getOperand(0)) | |||
580 | .addOperand(MI->getOperand(1)) | |||
581 | .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. | |||
582 | AddDefaultPred(MIB); | |||
583 | ||||
584 | // Transfer MI flags. | |||
585 | MIB.setMIFlags(MI->getFlags()); | |||
586 | ||||
587 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB; } } while (0 ); | |||
588 | ||||
589 | MBB.erase_instr(MI); | |||
590 | ++NumNarrows; | |||
591 | return true; | |||
592 | } | |||
593 | ||||
594 | if (Entry.LowRegs1 && !VerifyLowRegs(MI)) | |||
595 | return false; | |||
596 | ||||
597 | if (MI->mayLoadOrStore()) | |||
598 | return ReduceLoadStore(MBB, MI, Entry); | |||
599 | ||||
600 | switch (Opc) { | |||
601 | default: break; | |||
602 | case ARM::t2ADDSri: | |||
603 | case ARM::t2ADDSrr: { | |||
604 | unsigned PredReg = 0; | |||
605 | if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { | |||
606 | switch (Opc) { | |||
607 | default: break; | |||
608 | case ARM::t2ADDSri: { | |||
609 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) | |||
610 | return true; | |||
611 | // fallthrough | |||
612 | } | |||
613 | case ARM::t2ADDSrr: | |||
614 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
615 | } | |||
616 | } | |||
617 | break; | |||
618 | } | |||
619 | case ARM::t2RSBri: | |||
620 | case ARM::t2RSBSri: | |||
621 | case ARM::t2SXTB: | |||
622 | case ARM::t2SXTH: | |||
623 | case ARM::t2UXTB: | |||
624 | case ARM::t2UXTH: | |||
625 | if (MI->getOperand(2).getImm() == 0) | |||
626 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
627 | break; | |||
628 | case ARM::t2MOVi16: | |||
629 | // Can convert only 'pure' immediate operands, not immediates obtained as | |||
630 | // globals' addresses. | |||
631 | if (MI->getOperand(1).isImm()) | |||
632 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
633 | break; | |||
634 | case ARM::t2CMPrr: { | |||
635 | // Try to reduce to the lo-reg only version first. Why there are two | |||
636 | // versions of the instruction is a mystery. | |||
637 | // It would be nice to just have two entries in the master table that | |||
638 | // are prioritized, but the table assumes a unique entry for each | |||
639 | // source insn opcode. So for now, we hack a local entry record to use. | |||
640 | static const ReduceEntry NarrowEntry = | |||
641 | { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; | |||
642 | if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) | |||
643 | return true; | |||
644 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
645 | } | |||
646 | } | |||
647 | return false; | |||
648 | } | |||
649 | ||||
650 | bool | |||
651 | Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, | |||
652 | const ReduceEntry &Entry, | |||
653 | bool LiveCPSR, bool IsSelfLoop) { | |||
654 | ||||
655 | if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) | |||
656 | return false; | |||
657 | ||||
658 | if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) | |||
659 | // Don't issue movs with shifter operand for some CPUs unless we | |||
660 | // are optimizing for size. | |||
661 | return false; | |||
662 | ||||
663 | unsigned Reg0 = MI->getOperand(0).getReg(); | |||
664 | unsigned Reg1 = MI->getOperand(1).getReg(); | |||
665 | // t2MUL is "special". The tied source operand is second, not first. | |||
666 | if (MI->getOpcode() == ARM::t2MUL) { | |||
667 | unsigned Reg2 = MI->getOperand(2).getReg(); | |||
668 | // Early exit if the regs aren't all low regs. | |||
669 | if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) | |||
670 | || !isARMLowRegister(Reg2)) | |||
671 | return false; | |||
672 | if (Reg0 != Reg2) { | |||
673 | // If the other operand also isn't the same as the destination, we | |||
674 | // can't reduce. | |||
675 | if (Reg1 != Reg0) | |||
676 | return false; | |||
677 | // Try to commute the operands to make it a 2-address instruction. | |||
678 | MachineInstr *CommutedMI = TII->commuteInstruction(MI); | |||
679 | if (!CommutedMI) | |||
680 | return false; | |||
681 | } | |||
682 | } else if (Reg0 != Reg1) { | |||
683 | // Try to commute the operands to make it a 2-address instruction. | |||
684 | unsigned CommOpIdx1 = 1; | |||
685 | unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; | |||
686 | if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || | |||
687 | MI->getOperand(CommOpIdx2).getReg() != Reg0) | |||
688 | return false; | |||
689 | MachineInstr *CommutedMI = | |||
690 | TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2); | |||
691 | if (!CommutedMI) | |||
692 | return false; | |||
693 | } | |||
694 | if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) | |||
695 | return false; | |||
696 | if (Entry.Imm2Limit) { | |||
697 | unsigned Imm = MI->getOperand(2).getImm(); | |||
698 | unsigned Limit = (1 << Entry.Imm2Limit) - 1; | |||
699 | if (Imm > Limit) | |||
700 | return false; | |||
701 | } else { | |||
702 | unsigned Reg2 = MI->getOperand(2).getReg(); | |||
703 | if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) | |||
704 | return false; | |||
705 | } | |||
706 | ||||
707 | // Check if it's possible / necessary to transfer the predicate. | |||
708 | const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); | |||
709 | unsigned PredReg = 0; | |||
710 | ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); | |||
711 | bool SkipPred = false; | |||
712 | if (Pred != ARMCC::AL) { | |||
713 | if (!NewMCID.isPredicable()) | |||
714 | // Can't transfer predicate, fail. | |||
715 | return false; | |||
716 | } else { | |||
717 | SkipPred = !NewMCID.isPredicable(); | |||
718 | } | |||
719 | ||||
720 | bool HasCC = false; | |||
721 | bool CCDead = false; | |||
722 | const MCInstrDesc &MCID = MI->getDesc(); | |||
723 | if (MCID.hasOptionalDef()) { | |||
724 | unsigned NumOps = MCID.getNumOperands(); | |||
725 | HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); | |||
726 | if (HasCC && MI->getOperand(NumOps-1).isDead()) | |||
727 | CCDead = true; | |||
728 | } | |||
729 | if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) | |||
730 | return false; | |||
731 | ||||
732 | // Avoid adding a false dependency on partial flag update by some 16-bit | |||
733 | // instructions which has the 's' bit set. | |||
734 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && | |||
735 | canAddPseudoFlagDep(MI, IsSelfLoop)) | |||
736 | return false; | |||
737 | ||||
738 | // Add the 16-bit instruction. | |||
739 | DebugLoc dl = MI->getDebugLoc(); | |||
740 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); | |||
741 | MIB.addOperand(MI->getOperand(0)); | |||
742 | if (NewMCID.hasOptionalDef()) { | |||
743 | if (HasCC) | |||
744 | AddDefaultT1CC(MIB, CCDead); | |||
745 | else | |||
746 | AddNoT1CC(MIB); | |||
747 | } | |||
748 | ||||
749 | // Transfer the rest of operands. | |||
750 | unsigned NumOps = MCID.getNumOperands(); | |||
751 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { | |||
752 | if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) | |||
753 | continue; | |||
754 | if (SkipPred && MCID.OpInfo[i].isPredicate()) | |||
755 | continue; | |||
756 | MIB.addOperand(MI->getOperand(i)); | |||
757 | } | |||
758 | ||||
759 | // Transfer MI flags. | |||
760 | MIB.setMIFlags(MI->getFlags()); | |||
761 | ||||
762 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); | |||
763 | ||||
764 | MBB.erase_instr(MI); | |||
765 | ++Num2Addrs; | |||
766 | return true; | |||
767 | } | |||
768 | ||||
769 | bool | |||
770 | Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, | |||
771 | const ReduceEntry &Entry, | |||
772 | bool LiveCPSR, bool IsSelfLoop) { | |||
773 | if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) | |||
774 | return false; | |||
775 | ||||
776 | if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) | |||
777 | // Don't issue movs with shifter operand for some CPUs unless we | |||
778 | // are optimizing for size. | |||
779 | return false; | |||
780 | ||||
781 | unsigned Limit = ~0U; | |||
782 | if (Entry.Imm1Limit) | |||
783 | Limit = (1 << Entry.Imm1Limit) - 1; | |||
784 | ||||
785 | const MCInstrDesc &MCID = MI->getDesc(); | |||
786 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { | |||
787 | if (MCID.OpInfo[i].isPredicate()) | |||
788 | continue; | |||
789 | const MachineOperand &MO = MI->getOperand(i); | |||
790 | if (MO.isReg()) { | |||
791 | unsigned Reg = MO.getReg(); | |||
792 | if (!Reg || Reg == ARM::CPSR) | |||
793 | continue; | |||
794 | if (Entry.LowRegs1 && !isARMLowRegister(Reg)) | |||
795 | return false; | |||
796 | } else if (MO.isImm() && | |||
797 | !MCID.OpInfo[i].isPredicate()) { | |||
798 | if (((unsigned)MO.getImm()) > Limit) | |||
799 | return false; | |||
800 | } | |||
801 | } | |||
802 | ||||
803 | // Check if it's possible / necessary to transfer the predicate. | |||
804 | const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); | |||
805 | unsigned PredReg = 0; | |||
806 | ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); | |||
807 | bool SkipPred = false; | |||
808 | if (Pred != ARMCC::AL) { | |||
809 | if (!NewMCID.isPredicable()) | |||
810 | // Can't transfer predicate, fail. | |||
811 | return false; | |||
812 | } else { | |||
813 | SkipPred = !NewMCID.isPredicable(); | |||
814 | } | |||
815 | ||||
816 | bool HasCC = false; | |||
817 | bool CCDead = false; | |||
818 | if (MCID.hasOptionalDef()) { | |||
819 | unsigned NumOps = MCID.getNumOperands(); | |||
820 | HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); | |||
821 | if (HasCC && MI->getOperand(NumOps-1).isDead()) | |||
822 | CCDead = true; | |||
823 | } | |||
824 | if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) | |||
825 | return false; | |||
826 | ||||
827 | // Avoid adding a false dependency on partial flag update by some 16-bit | |||
828 | // instructions which has the 's' bit set. | |||
829 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && | |||
830 | canAddPseudoFlagDep(MI, IsSelfLoop)) | |||
831 | return false; | |||
832 | ||||
833 | // Add the 16-bit instruction. | |||
834 | DebugLoc dl = MI->getDebugLoc(); | |||
835 | MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); | |||
836 | MIB.addOperand(MI->getOperand(0)); | |||
837 | if (NewMCID.hasOptionalDef()) { | |||
838 | if (HasCC) | |||
839 | AddDefaultT1CC(MIB, CCDead); | |||
840 | else | |||
841 | AddNoT1CC(MIB); | |||
842 | } | |||
843 | ||||
844 | // Transfer the rest of operands. | |||
845 | unsigned NumOps = MCID.getNumOperands(); | |||
846 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { | |||
847 | if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) | |||
848 | continue; | |||
849 | if ((MCID.getOpcode() == ARM::t2RSBSri || | |||
850 | MCID.getOpcode() == ARM::t2RSBri || | |||
851 | MCID.getOpcode() == ARM::t2SXTB || | |||
852 | MCID.getOpcode() == ARM::t2SXTH || | |||
853 | MCID.getOpcode() == ARM::t2UXTB || | |||
854 | MCID.getOpcode() == ARM::t2UXTH) && i == 2) | |||
855 | // Skip the zero immediate operand, it's now implicit. | |||
856 | continue; | |||
857 | bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); | |||
858 | if (SkipPred && isPred) | |||
859 | continue; | |||
860 | const MachineOperand &MO = MI->getOperand(i); | |||
861 | if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) | |||
862 | // Skip implicit def of CPSR. Either it's modeled as an optional | |||
863 | // def now or it's already an implicit def on the new instruction. | |||
864 | continue; | |||
865 | MIB.addOperand(MO); | |||
866 | } | |||
867 | if (!MCID.isPredicable() && NewMCID.isPredicable()) | |||
868 | AddDefaultPred(MIB); | |||
869 | ||||
870 | // Transfer MI flags. | |||
871 | MIB.setMIFlags(MI->getFlags()); | |||
872 | ||||
873 | DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("t2-reduce-size")) { errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; } } while ( 0); | |||
874 | ||||
875 | MBB.erase_instr(MI); | |||
876 | ++NumNarrows; | |||
877 | return true; | |||
878 | } | |||
879 | ||||
880 | static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { | |||
881 | bool HasDef = false; | |||
882 | for (const MachineOperand &MO : MI.operands()) { | |||
883 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) | |||
884 | continue; | |||
885 | if (MO.getReg() != ARM::CPSR) | |||
886 | continue; | |||
887 | ||||
888 | DefCPSR = true; | |||
889 | if (!MO.isDead()) | |||
890 | HasDef = true; | |||
891 | } | |||
892 | ||||
893 | return HasDef || LiveCPSR; | |||
894 | } | |||
895 | ||||
896 | static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { | |||
897 | for (const MachineOperand &MO : MI.operands()) { | |||
898 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) | |||
899 | continue; | |||
900 | if (MO.getReg() != ARM::CPSR) | |||
901 | continue; | |||
902 | assert(LiveCPSR && "CPSR liveness tracking is wrong!")((LiveCPSR && "CPSR liveness tracking is wrong!") ? static_cast <void> (0) : __assert_fail ("LiveCPSR && \"CPSR liveness tracking is wrong!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn267387/lib/Target/ARM/Thumb2SizeReduction.cpp" , 902, __PRETTY_FUNCTION__)); | |||
903 | if (MO.isKill()) { | |||
904 | LiveCPSR = false; | |||
905 | break; | |||
906 | } | |||
907 | } | |||
908 | ||||
909 | return LiveCPSR; | |||
910 | } | |||
911 | ||||
912 | bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, | |||
913 | bool LiveCPSR, bool IsSelfLoop) { | |||
914 | unsigned Opcode = MI->getOpcode(); | |||
915 | DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); | |||
916 | if (OPI == ReduceOpcodeMap.end()) | |||
917 | return false; | |||
918 | const ReduceEntry &Entry = ReduceTable[OPI->second]; | |||
919 | ||||
920 | // Don't attempt normal reductions on "special" cases for now. | |||
921 | if (Entry.Special) | |||
922 | return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); | |||
923 | ||||
924 | // Try to transform to a 16-bit two-address instruction. | |||
925 | if (Entry.NarrowOpc2 && | |||
926 | ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) | |||
927 | return true; | |||
928 | ||||
929 | // Try to transform to a 16-bit non-two-address instruction. | |||
930 | if (Entry.NarrowOpc1 && | |||
931 | ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) | |||
932 | return true; | |||
933 | ||||
934 | return false; | |||
935 | } | |||
936 | ||||
937 | bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { | |||
938 | bool Modified = false; | |||
939 | ||||
940 | // Yes, CPSR could be livein. | |||
941 | bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); | |||
942 | MachineInstr *BundleMI = nullptr; | |||
943 | ||||
944 | CPSRDef = nullptr; | |||
945 | HighLatencyCPSR = false; | |||
946 | ||||
947 | // Check predecessors for the latest CPSRDef. | |||
948 | for (auto *Pred : MBB.predecessors()) { | |||
949 | const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; | |||
950 | if (!PInfo.Visited) { | |||
951 | // Since blocks are visited in RPO, this must be a back-edge. | |||
952 | continue; | |||
953 | } | |||
954 | if (PInfo.HighLatencyCPSR) { | |||
955 | HighLatencyCPSR = true; | |||
956 | break; | |||
957 | } | |||
958 | } | |||
959 | ||||
960 | // If this BB loops back to itself, conservatively avoid narrowing the | |||
961 | // first instruction that does partial flag update. | |||
962 | bool IsSelfLoop = MBB.isSuccessor(&MBB); | |||
963 | MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); | |||
964 | MachineBasicBlock::instr_iterator NextMII; | |||
965 | for (; MII != E; MII = NextMII) { | |||
966 | NextMII = std::next(MII); | |||
967 | ||||
968 | MachineInstr *MI = &*MII; | |||
969 | if (MI->isBundle()) { | |||
970 | BundleMI = MI; | |||
971 | continue; | |||
972 | } | |||
973 | if (MI->isDebugValue()) | |||
974 | continue; | |||
975 | ||||
976 | LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); | |||
977 | ||||
978 | // Does NextMII belong to the same bundle as MI? | |||
979 | bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); | |||
980 | ||||
981 | if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { | |||
982 | Modified = true; | |||
983 | MachineBasicBlock::instr_iterator I = std::prev(NextMII); | |||
984 | MI = &*I; | |||
985 | // Removing and reinserting the first instruction in a bundle will break | |||
986 | // up the bundle. Fix the bundling if it was broken. | |||
987 | if (NextInSameBundle && !NextMII->isBundledWithPred()) | |||
988 | NextMII->bundleWithPred(); | |||
989 | } | |||
990 | ||||
991 | if (!NextInSameBundle && MI->isInsideBundle()) { | |||
992 | // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill | |||
993 | // marker is only on the BUNDLE instruction. Process the BUNDLE | |||
994 | // instruction as we finish with the bundled instruction to work around | |||
995 | // the inconsistency. | |||
996 | if (BundleMI->killsRegister(ARM::CPSR)) | |||
| ||||
997 | LiveCPSR = false; | |||
998 | MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); | |||
999 | if (MO && !MO->isDead()) | |||
1000 | LiveCPSR = true; | |||
1001 | MO = BundleMI->findRegisterUseOperand(ARM::CPSR); | |||
1002 | if (MO && !MO->isKill()) | |||
1003 | LiveCPSR = true; | |||
1004 | } | |||
1005 | ||||
1006 | bool DefCPSR = false; | |||
1007 | LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); | |||
1008 | if (MI->isCall()) { | |||
1009 | // Calls don't really set CPSR. | |||
1010 | CPSRDef = nullptr; | |||
1011 | HighLatencyCPSR = false; | |||
1012 | IsSelfLoop = false; | |||
1013 | } else if (DefCPSR) { | |||
1014 | // This is the last CPSR defining instruction. | |||
1015 | CPSRDef = MI; | |||
1016 | HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); | |||
1017 | IsSelfLoop = false; | |||
1018 | } | |||
1019 | } | |||
1020 | ||||
1021 | MBBInfo &Info = BlockInfo[MBB.getNumber()]; | |||
1022 | Info.HighLatencyCPSR = HighLatencyCPSR; | |||
1023 | Info.Visited = true; | |||
1024 | return Modified; | |||
1025 | } | |||
1026 | ||||
1027 | bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { | |||
1028 | if (PredicateFtor && !PredicateFtor(*MF.getFunction())) | |||
| ||||
1029 | return false; | |||
1030 | ||||
1031 | STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); | |||
1032 | if (STI->isThumb1Only() || STI->prefers32BitThumb()) | |||
1033 | return false; | |||
1034 | ||||
1035 | TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); | |||
1036 | ||||
1037 | // Optimizing / minimizing size? Minimizing size implies optimizing for size. | |||
1038 | OptimizeSize = MF.getFunction()->optForSize(); | |||
1039 | MinimizeSize = MF.getFunction()->optForMinSize(); | |||
1040 | ||||
1041 | BlockInfo.clear(); | |||
1042 | BlockInfo.resize(MF.getNumBlockIDs()); | |||
1043 | ||||
1044 | // Visit blocks in reverse post-order so LastCPSRDef is known for all | |||
1045 | // predecessors. | |||
1046 | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); | |||
1047 | bool Modified = false; | |||
1048 | for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator | |||
1049 | I = RPOT.begin(), E = RPOT.end(); I != E; ++I) | |||
1050 | Modified |= ReduceMBB(**I); | |||
1051 | return Modified; | |||
1052 | } | |||
1053 | ||||
1054 | /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size | |||
1055 | /// reduction pass. | |||
1056 | FunctionPass *llvm::createThumb2SizeReductionPass( | |||
1057 | std::function<bool(const Function &)> Ftor) { | |||
1058 | return new Thumb2SizeReduce(Ftor); | |||
1059 | } |