LLVM 23.0.0git
Thumb2SizeReduction.cpp
Go to the documentation of this file.
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
10#include "ARMBaseInstrInfo.h"
11#include "ARMSubtarget.h"
13#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/DebugLoc.h"
29#include "llvm/IR/Function.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Support/Debug.h"
37#include <cassert>
38#include <cstdint>
39#include <functional>
40#include <iterator>
41#include <utility>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "thumb2-reduce-size"
46#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
47
48STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
49STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
50STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
51
52static cl::opt<int> ReduceLimit("t2-reduce-limit",
53 cl::init(-1), cl::Hidden);
54static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
55 cl::init(-1), cl::Hidden);
56static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
57 cl::init(-1), cl::Hidden);
58
59namespace {
60
61 /// ReduceTable - A static table with information on mapping from wide
62 /// opcodes to narrow
63 struct ReduceEntry {
64 uint16_t WideOpc; // Wide opcode
65 uint16_t NarrowOpc1; // Narrow opcode to transform to
66 uint16_t NarrowOpc2; // Narrow opcode when it's two-address
67 uint8_t Imm1Limit; // Limit of immediate field (bits)
68 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
69 unsigned LowRegs1 : 1; // Only possible if low-registers are used
70 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
71 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
72 // 1 - No cc field.
73 // 2 - Always set CPSR.
74 unsigned PredCC2 : 2;
75 unsigned PartFlag : 1; // 16-bit instruction does partial flag update
76 unsigned Special : 1; // Needs to be dealt with specially
77 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
78 };
79
80 static const ReduceEntry ReduceTable[] = {
81 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
82 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
83 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
84 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
85 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
86 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
87 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
88 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
89 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
90 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
91 { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
92 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
93 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
94 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
95 // FIXME: adr.n immediate offset must be multiple of 4.
96 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
97 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
98 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
99 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
100 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
101 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
102 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
103 // FIXME: Do we need the 16-bit 'S' variant?
104 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
105 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
106 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
107 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
108 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
109 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
110 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
111 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
112 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
113 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
114 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
115 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
116 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
117 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
118 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
119 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
120 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
121 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
122 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
123 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
124 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
125
126 // FIXME: Clean this up after splitting each Thumb load / store opcode
127 // into multiple ones.
128 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
129 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
130 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
131 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
132 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
144
145 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
146 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
147 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
148 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
149 // tSTMIA_UPD is a change in semantics which can only be used if the base
150 // register is killed. This difference is correctly handled elsewhere.
151 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
152 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
153 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
154 };
155
156 class Thumb2SizeReduce : public MachineFunctionPass {
157 public:
158 static char ID;
159
160 const Thumb2InstrInfo *TII;
161 const ARMSubtarget *STI;
162
163 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
164
165 bool runOnMachineFunction(MachineFunction &MF) override;
166
167 MachineFunctionProperties getRequiredProperties() const override {
168 return MachineFunctionProperties().setNoVRegs();
169 }
170
171 StringRef getPassName() const override {
173 }
174
175 private:
176 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
177 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
178
179 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
180
181 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
182 bool is2Addr, ARMCC::CondCodes Pred,
183 bool LiveCPSR, bool &HasCC, bool &CCDead);
184
185 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
186 const ReduceEntry &Entry);
187
188 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
189 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
190
191 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
192 /// instruction.
193 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
194 const ReduceEntry &Entry, bool LiveCPSR,
195 bool IsSelfLoop);
196
197 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
198 /// non-two-address instruction.
199 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
200 const ReduceEntry &Entry, bool LiveCPSR,
201 bool IsSelfLoop);
202
203 /// ReduceMI - Attempt to reduce MI, return true on success.
204 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
205 bool IsSelfLoop, bool SkipPrologueEpilogue);
206
207 /// ReduceMBB - Reduce width of instructions in the specified basic block.
208 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
209
210 bool OptimizeSize;
211 bool MinimizeSize;
212
213 // Last instruction to define CPSR in the current block.
214 MachineInstr *CPSRDef;
215 // Was CPSR last defined by a high latency instruction?
216 // When CPSRDef is null, this refers to CPSR defs in predecessors.
217 bool HighLatencyCPSR;
218
219 struct MBBInfo {
220 // The flags leaving this block have high latency.
221 bool HighLatencyCPSR = false;
222 // Has this block been visited yet?
223 bool Visited = false;
224
225 MBBInfo() = default;
226 };
227
228 SmallVector<MBBInfo, 8> BlockInfo;
229
230 std::function<bool(const Function &)> PredicateFtor;
231 };
232
233 char Thumb2SizeReduce::ID = 0;
234
235} // end anonymous namespace
236
238 false)
239
240Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
241 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
242 OptimizeSize = MinimizeSize = false;
243 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
244 unsigned FromOpc = ReduceTable[i].WideOpc;
245 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
246 llvm_unreachable("Duplicated entries?");
247 }
248}
249
251 return is_contained(MCID.implicit_defs(), ARM::CPSR);
252}
253
254// Check for a likely high-latency flag def.
256 switch(Def->getOpcode()) {
257 case ARM::FMSTAT:
258 case ARM::tMUL:
259 return true;
260 }
261 return false;
262}
263
264/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
265/// the 's' 16-bit instruction partially update CPSR. Abort the
266/// transformation to avoid adding false dependency on last CPSR setting
267/// instruction which hurts the ability for out-of-order execution engine
268/// to do register renaming magic.
269/// This function checks if there is a read-of-write dependency between the
270/// last instruction that defines the CPSR and the current instruction. If there
271/// is, then there is no harm done since the instruction cannot be retired
272/// before the CPSR setting instruction anyway.
273/// Note, we are not doing full dependency analysis here for the sake of compile
274/// time. We're not looking for cases like:
275/// r0 = muls ...
276/// r1 = add.w r0, ...
277/// ...
278/// = mul.w r1
279/// In this case it would have been ok to narrow the mul.w to muls since there
280/// are indirect RAW dependency between the muls and the mul.w
281bool
282Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
283 // Disable the check for -Oz (aka OptimizeForSizeHarder).
284 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
285 return false;
286
287 if (!CPSRDef)
288 // If this BB loops back to itself, conservatively avoid narrowing the
289 // first instruction that does partial flag update.
290 return HighLatencyCPSR || FirstInSelfLoop;
291
292 SmallSet<unsigned, 2> Defs;
293 for (const MachineOperand &MO : CPSRDef->operands()) {
294 if (!MO.isReg() || MO.isUndef() || MO.isUse())
295 continue;
296 Register Reg = MO.getReg();
297 if (Reg == 0 || Reg == ARM::CPSR)
298 continue;
299 Defs.insert(Reg);
300 }
301
302 for (const MachineOperand &MO : Use->operands()) {
303 if (!MO.isReg() || MO.isUndef() || MO.isDef())
304 continue;
305 Register Reg = MO.getReg();
306 if (Defs.count(Reg))
307 return false;
308 }
309
310 // If the current CPSR has high latency, try to avoid the false dependency.
311 if (HighLatencyCPSR)
312 return true;
313
314 // tMOVi8 usually doesn't start long dependency chains, and there are a lot
315 // of them, so always shrink them when CPSR doesn't have high latency.
316 if (Use->getOpcode() == ARM::t2MOVi ||
317 Use->getOpcode() == ARM::t2MOVi16)
318 return false;
319
320 // No read-after-write dependency. The narrowing will add false dependency.
321 return true;
322}
323
324bool
325Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
326 bool is2Addr, ARMCC::CondCodes Pred,
327 bool LiveCPSR, bool &HasCC, bool &CCDead) {
328 if ((is2Addr && Entry.PredCC2 == 0) ||
329 (!is2Addr && Entry.PredCC1 == 0)) {
330 if (Pred == ARMCC::AL) {
331 // Not predicated, must set CPSR.
332 if (!HasCC) {
333 // Original instruction was not setting CPSR, but CPSR is not
334 // currently live anyway. It's ok to set it. The CPSR def is
335 // dead though.
336 if (!LiveCPSR) {
337 HasCC = true;
338 CCDead = true;
339 return true;
340 }
341 return false;
342 }
343 } else {
344 // Predicated, must not set CPSR.
345 if (HasCC)
346 return false;
347 }
348 } else if ((is2Addr && Entry.PredCC2 == 2) ||
349 (!is2Addr && Entry.PredCC1 == 2)) {
350 /// Old opcode has an optional def of CPSR.
351 if (HasCC)
352 return true;
353 // If old opcode does not implicitly define CPSR, then it's not ok since
354 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
355 if (!HasImplicitCPSRDef(MI->getDesc()))
356 return false;
357 HasCC = true;
358 } else {
359 // 16-bit instruction does not set CPSR.
360 if (HasCC)
361 return false;
362 }
363
364 return true;
365}
366
368 unsigned Opc = MI->getOpcode();
369 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
370 bool isLROk = (Opc == ARM::t2STMDB_UPD);
371 bool isSPOk = isPCOk || isLROk;
372 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
373 const MachineOperand &MO = MI->getOperand(i);
374 if (!MO.isReg() || MO.isImplicit())
375 continue;
376 Register Reg = MO.getReg();
377 if (Reg == 0 || Reg == ARM::CPSR)
378 continue;
379 if (isPCOk && Reg == ARM::PC)
380 continue;
381 if (isLROk && Reg == ARM::LR)
382 continue;
383 if (Reg == ARM::SP) {
384 if (isSPOk)
385 continue;
386 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
387 // Special case for these ldr / str with sp as base register.
388 continue;
389 }
390 if (!isARMLowRegister(Reg))
391 return false;
392 }
393 return true;
394}
395
396bool
397Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
398 const ReduceEntry &Entry) {
399 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
400 return false;
401
402 unsigned Scale = 1;
403 bool HasImmOffset = false;
404 bool HasShift = false;
405 bool HasOffReg = true;
406 bool isLdStMul = false;
407 unsigned Opc = Entry.NarrowOpc1;
408 unsigned OpNum = 3; // First 'rest' of operands.
409 uint8_t ImmLimit = Entry.Imm1Limit;
410
411 switch (Entry.WideOpc) {
412 default:
413 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
414 case ARM::t2LDRi12:
415 case ARM::t2STRi12:
416 if (MI->getOperand(1).getReg() == ARM::SP) {
417 Opc = Entry.NarrowOpc2;
418 ImmLimit = Entry.Imm2Limit;
419 }
420
421 Scale = 4;
422 HasImmOffset = true;
423 HasOffReg = false;
424 break;
425 case ARM::t2LDRBi12:
426 case ARM::t2STRBi12:
427 HasImmOffset = true;
428 HasOffReg = false;
429 break;
430 case ARM::t2LDRHi12:
431 case ARM::t2STRHi12:
432 Scale = 2;
433 HasImmOffset = true;
434 HasOffReg = false;
435 break;
436 case ARM::t2LDRs:
437 case ARM::t2LDRBs:
438 case ARM::t2LDRHs:
439 case ARM::t2LDRSBs:
440 case ARM::t2LDRSHs:
441 case ARM::t2STRs:
442 case ARM::t2STRBs:
443 case ARM::t2STRHs:
444 HasShift = true;
445 OpNum = 4;
446 break;
447 case ARM::t2LDR_POST:
448 case ARM::t2STR_POST: {
449 if (!MinimizeSize)
450 return false;
451
452 if (!MI->hasOneMemOperand() ||
453 (*MI->memoperands_begin())->getAlign() < Align(4))
454 return false;
455
456 // We're creating a completely different type of load/store - LDM from LDR.
457 // For this reason we can't reuse the logic at the end of this function; we
458 // have to implement the MI building here.
459 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
460 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
461 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
462 unsigned Offset = MI->getOperand(3).getImm();
463 unsigned PredImm = MI->getOperand(4).getImm();
464 Register PredReg = MI->getOperand(5).getReg();
467
468 if (Offset != 4)
469 return false;
470
471 // Add the 16-bit load / store instruction.
472 DebugLoc dl = MI->getDebugLoc();
473 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
474 .addReg(Rn, RegState::Define)
475 .addReg(Rn)
476 .addImm(PredImm)
477 .addReg(PredReg)
478 .addReg(Rt, getDefRegState(!IsStore));
479
480 // Transfer memoperands.
481 MIB.setMemRefs(MI->memoperands());
482
483 // Transfer MI flags.
484 MIB.setMIFlags(MI->getFlags());
485
486 // Kill the old instruction.
487 MI->eraseFromBundle();
488 ++NumLdSts;
489 return true;
490 }
491 case ARM::t2LDMIA: {
492 Register BaseReg = MI->getOperand(0).getReg();
493 assert(isARMLowRegister(BaseReg));
494
495 // For the non-writeback version (this one), the base register must be
496 // one of the registers being loaded.
497 bool isOK = false;
498 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
499 if (MO.getReg() == BaseReg) {
500 isOK = true;
501 break;
502 }
503 }
504
505 if (!isOK)
506 return false;
507
508 OpNum = 0;
509 isLdStMul = true;
510 break;
511 }
512 case ARM::t2STMIA: {
513 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
514 // if the base register is killed, as then it doesn't matter what its value
515 // is after the instruction.
516 if (!MI->getOperand(0).isKill())
517 return false;
518
519 // If the base register is in the register list and isn't the lowest
520 // numbered register (i.e. it's in operand 4 onwards) then with writeback
521 // the stored value is unknown, so we can't convert to tSTMIA_UPD.
522 Register BaseReg = MI->getOperand(0).getReg();
523 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
524 if (MO.getReg() == BaseReg)
525 return false;
526
527 break;
528 }
529 case ARM::t2LDMIA_RET: {
530 Register BaseReg = MI->getOperand(1).getReg();
531 if (BaseReg != ARM::SP)
532 return false;
533 Opc = Entry.NarrowOpc2; // tPOP_RET
534 OpNum = 2;
535 isLdStMul = true;
536 break;
537 }
538 case ARM::t2LDMIA_UPD:
539 case ARM::t2STMIA_UPD:
540 case ARM::t2STMDB_UPD: {
541 OpNum = 0;
542
543 Register BaseReg = MI->getOperand(1).getReg();
544 if (BaseReg == ARM::SP &&
545 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
546 Entry.WideOpc == ARM::t2STMDB_UPD)) {
547 Opc = Entry.NarrowOpc2; // tPOP or tPUSH
548 OpNum = 2;
549 } else if (!isARMLowRegister(BaseReg) ||
550 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
551 Entry.WideOpc != ARM::t2STMIA_UPD)) {
552 return false;
553 }
554
555 isLdStMul = true;
556 break;
557 }
558 }
559
560 unsigned OffsetReg = 0;
561 bool OffsetKill = false;
562 bool OffsetInternal = false;
563 if (HasShift) {
564 OffsetReg = MI->getOperand(2).getReg();
565 OffsetKill = MI->getOperand(2).isKill();
566 OffsetInternal = MI->getOperand(2).isInternalRead();
567
568 if (MI->getOperand(3).getImm())
569 // Thumb1 addressing mode doesn't support shift.
570 return false;
571 }
572
573 unsigned OffsetImm = 0;
574 if (HasImmOffset) {
575 OffsetImm = MI->getOperand(2).getImm();
576 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
577
578 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
579 // Make sure the immediate field fits.
580 return false;
581 }
582
583 // Add the 16-bit load / store instruction.
584 DebugLoc dl = MI->getDebugLoc();
585 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
586
587 // tSTMIA_UPD takes a defining register operand. We've already checked that
588 // the register is killed, so mark it as dead here.
589 if (Entry.WideOpc == ARM::t2STMIA)
590 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
591
592 if (!isLdStMul) {
593 MIB.add(MI->getOperand(0));
594 MIB.add(MI->getOperand(1));
595
596 if (HasImmOffset)
597 MIB.addImm(OffsetImm / Scale);
598
599 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
600
601 if (HasOffReg)
602 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
603 getInternalReadRegState(OffsetInternal));
604 }
605
606 // Transfer the rest of operands.
607 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
608 MIB.add(MO);
609
610 // Transfer memoperands.
611 MIB.setMemRefs(MI->memoperands());
612
613 // Transfer MI flags.
614 MIB.setMIFlags(MI->getFlags());
615
616 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
617 << " to 16-bit: " << *MIB);
618
620 ++NumLdSts;
621 return true;
622}
623
624bool
625Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
626 const ReduceEntry &Entry,
627 bool LiveCPSR, bool IsSelfLoop) {
628 unsigned Opc = MI->getOpcode();
629 if (Opc == ARM::t2ADDri) {
630 // If the source register is SP, try to reduce to tADDrSPi, otherwise
631 // it's a normal reduce.
632 if (MI->getOperand(1).getReg() != ARM::SP) {
633 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
634 return true;
635 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
636 }
637 // Try to reduce to tADDrSPi.
638 unsigned Imm = MI->getOperand(2).getImm();
639 // The immediate must be in range, the destination register must be a low
640 // reg, the predicate must be "always" and the condition flags must not
641 // be being set.
642 if (Imm & 3 || Imm > 1020)
643 return false;
644 if (!isARMLowRegister(MI->getOperand(0).getReg()))
645 return false;
646 if (MI->getOperand(3).getImm() != ARMCC::AL)
647 return false;
648 const MCInstrDesc &MCID = MI->getDesc();
649 if (MCID.hasOptionalDef() &&
650 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
651 return false;
652
653 MachineInstrBuilder MIB =
654 BuildMI(MBB, MI, MI->getDebugLoc(),
655 TII->get(ARM::tADDrSPi))
656 .add(MI->getOperand(0))
657 .add(MI->getOperand(1))
658 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
660
661 // Transfer MI flags.
662 MIB.setMIFlags(MI->getFlags());
663
664 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
665 << " to 16-bit: " << *MIB);
666
668 ++NumNarrows;
669 return true;
670 }
671
672 if (Entry.LowRegs1 && !VerifyLowRegs(MI))
673 return false;
674
675 if (MI->mayLoadOrStore())
676 return ReduceLoadStore(MBB, MI, Entry);
677
678 switch (Opc) {
679 default: break;
680 case ARM::t2ADDSri:
681 case ARM::t2ADDSrr: {
682 Register PredReg;
683 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
684 switch (Opc) {
685 default: break;
686 case ARM::t2ADDSri:
687 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
688 return true;
689 [[fallthrough]];
690 case ARM::t2ADDSrr:
691 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
692 }
693 }
694 break;
695 }
696 case ARM::t2RSBri:
697 case ARM::t2RSBSri:
698 case ARM::t2SXTB:
699 case ARM::t2SXTH:
700 case ARM::t2UXTB:
701 case ARM::t2UXTH:
702 if (MI->getOperand(2).getImm() == 0)
703 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
704 break;
705 case ARM::t2MOVi16:
706 // Can convert only 'pure' immediate operands, not immediates obtained as
707 // globals' addresses.
708 if (MI->getOperand(1).isImm())
709 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
710 break;
711 case ARM::t2CMPrr: {
712 // Try to reduce to the lo-reg only version first. Why there are two
713 // versions of the instruction is a mystery.
714 // It would be nice to just have two entries in the main table that
715 // are prioritized, but the table assumes a unique entry for each
716 // source insn opcode. So for now, we hack a local entry record to use.
717 static const ReduceEntry NarrowEntry =
718 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
719 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
720 return true;
721 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
722 }
723 case ARM::t2TEQrr: {
724 Register PredReg;
725 // Can only convert to eors if we're not in an IT block.
726 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
727 break;
728 // TODO if Operand 0 is not killed but Operand 1 is, then we could write
729 // to Op1 instead.
730 if (MI->getOperand(0).isKill())
731 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
732 }
733 }
734 return false;
735}
736
737bool
738Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
739 const ReduceEntry &Entry,
740 bool LiveCPSR, bool IsSelfLoop) {
741 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
742 return false;
743
744 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
745 // Don't issue movs with shifter operand for some CPUs unless we
746 // are optimizing for size.
747 return false;
748
749 Register Reg0 = MI->getOperand(0).getReg();
750 Register Reg1 = MI->getOperand(1).getReg();
751 // t2MUL is "special". The tied source operand is second, not first.
752 if (MI->getOpcode() == ARM::t2MUL) {
753 // MULS can be slower than MUL
754 if (!MinimizeSize && STI->avoidMULS())
755 return false;
756 Register Reg2 = MI->getOperand(2).getReg();
757 // Early exit if the regs aren't all low regs.
758 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
759 || !isARMLowRegister(Reg2))
760 return false;
761 if (Reg0 != Reg2) {
762 // If the other operand also isn't the same as the destination, we
763 // can't reduce.
764 if (Reg1 != Reg0)
765 return false;
766 // Try to commute the operands to make it a 2-address instruction.
767 MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
768 if (!CommutedMI)
769 return false;
770 }
771 } else if (Reg0 != Reg1) {
772 // Try to commute the operands to make it a 2-address instruction.
773 unsigned CommOpIdx1 = 1;
774 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
775 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
776 MI->getOperand(CommOpIdx2).getReg() != Reg0)
777 return false;
778 MachineInstr *CommutedMI =
779 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
780 if (!CommutedMI)
781 return false;
782 }
783 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
784 return false;
785 if (Entry.Imm2Limit) {
786 unsigned Imm = MI->getOperand(2).getImm();
787 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
788 if (Imm > Limit)
789 return false;
790 } else {
791 Register Reg2 = MI->getOperand(2).getReg();
792 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
793 return false;
794 }
795
796 // Check if it's possible / necessary to transfer the predicate.
797 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
798 Register PredReg;
799 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
800 bool SkipPred = false;
801 if (Pred != ARMCC::AL) {
802 if (!NewMCID.isPredicable())
803 // Can't transfer predicate, fail.
804 return false;
805 } else {
806 SkipPred = !NewMCID.isPredicable();
807 }
808
809 bool HasCC = false;
810 bool CCDead = false;
811 const MCInstrDesc &MCID = MI->getDesc();
812 if (MCID.hasOptionalDef()) {
813 unsigned NumOps = MCID.getNumOperands();
814 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
815 if (HasCC && MI->getOperand(NumOps-1).isDead())
816 CCDead = true;
817 }
818 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
819 return false;
820
821 // Avoid adding a false dependency on partial flag update by some 16-bit
822 // instructions which has the 's' bit set.
823 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
824 canAddPseudoFlagDep(MI, IsSelfLoop))
825 return false;
826
827 // Add the 16-bit instruction.
828 DebugLoc dl = MI->getDebugLoc();
829 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
830 MIB.add(MI->getOperand(0));
831 if (NewMCID.hasOptionalDef())
832 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
833
834 // Transfer the rest of operands.
835 unsigned NumOps = MCID.getNumOperands();
836 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
837 if (i < NumOps && MCID.operands()[i].isOptionalDef())
838 continue;
839 if (SkipPred && MCID.operands()[i].isPredicate())
840 continue;
841 MIB.add(MI->getOperand(i));
842 }
843
844 // Transfer MI flags.
845 MIB.setMIFlags(MI->getFlags());
846
847 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
848 << " to 16-bit: " << *MIB);
849
851 ++Num2Addrs;
852 return true;
853}
854
855bool
856Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
857 const ReduceEntry &Entry,
858 bool LiveCPSR, bool IsSelfLoop) {
859 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
860 return false;
861
862 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
863 // Don't issue movs with shifter operand for some CPUs unless we
864 // are optimizing for size.
865 return false;
866
867 unsigned Limit = ~0U;
868 if (Entry.Imm1Limit)
869 Limit = (1 << Entry.Imm1Limit) - 1;
870
871 const MCInstrDesc &MCID = MI->getDesc();
872 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
873 if (MCID.operands()[i].isPredicate())
874 continue;
875 const MachineOperand &MO = MI->getOperand(i);
876 if (MO.isReg()) {
877 Register Reg = MO.getReg();
878 if (!Reg || Reg == ARM::CPSR)
879 continue;
880 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
881 return false;
882 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
883 if (((unsigned)MO.getImm()) > Limit)
884 return false;
885 }
886 }
887
888 // Check if it's possible / necessary to transfer the predicate.
889 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
890 Register PredReg;
891 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
892 bool SkipPred = false;
893 if (Pred != ARMCC::AL) {
894 if (!NewMCID.isPredicable())
895 // Can't transfer predicate, fail.
896 return false;
897 } else {
898 SkipPred = !NewMCID.isPredicable();
899 }
900
901 bool HasCC = false;
902 bool CCDead = false;
903 if (MCID.hasOptionalDef()) {
904 unsigned NumOps = MCID.getNumOperands();
905 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
906 if (HasCC && MI->getOperand(NumOps-1).isDead())
907 CCDead = true;
908 }
909 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
910 return false;
911
912 // Avoid adding a false dependency on partial flag update by some 16-bit
913 // instructions which has the 's' bit set.
914 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
915 canAddPseudoFlagDep(MI, IsSelfLoop))
916 return false;
917
918 // Add the 16-bit instruction.
919 DebugLoc dl = MI->getDebugLoc();
920 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
921
922 // TEQ is special in that it doesn't define a register but we're converting
923 // it into an EOR which does. So add the first operand as a def and then
924 // again as a use.
925 if (MCID.getOpcode() == ARM::t2TEQrr) {
926 MIB.add(MI->getOperand(0));
927 MIB->getOperand(0).setIsKill(false);
928 MIB->getOperand(0).setIsDef(true);
929 MIB->getOperand(0).setIsDead(true);
930
931 if (NewMCID.hasOptionalDef())
932 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
933 MIB.add(MI->getOperand(0));
934 } else {
935 MIB.add(MI->getOperand(0));
936 if (NewMCID.hasOptionalDef())
937 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
938 }
939
940 // Transfer the rest of operands.
941 unsigned NumOps = MCID.getNumOperands();
942 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
943 if (i < NumOps && MCID.operands()[i].isOptionalDef())
944 continue;
945 if ((MCID.getOpcode() == ARM::t2RSBSri ||
946 MCID.getOpcode() == ARM::t2RSBri ||
947 MCID.getOpcode() == ARM::t2SXTB ||
948 MCID.getOpcode() == ARM::t2SXTH ||
949 MCID.getOpcode() == ARM::t2UXTB ||
950 MCID.getOpcode() == ARM::t2UXTH) && i == 2)
951 // Skip the zero immediate operand, it's now implicit.
952 continue;
953 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
954 if (SkipPred && isPred)
955 continue;
956 const MachineOperand &MO = MI->getOperand(i);
957 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
958 // Skip implicit def of CPSR. Either it's modeled as an optional
959 // def now or it's already an implicit def on the new instruction.
960 continue;
961 MIB.add(MO);
962 }
963 if (!MCID.isPredicable() && NewMCID.isPredicable())
964 MIB.add(predOps(ARMCC::AL));
965
966 // Transfer MI flags.
967 MIB.setMIFlags(MI->getFlags());
968
969 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
970 << " to 16-bit: " << *MIB);
971
973 ++NumNarrows;
974 return true;
975}
976
977static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
978 bool HasDef = false;
979 for (const MachineOperand &MO : MI.operands()) {
980 if (!MO.isReg() || MO.isUndef() || MO.isUse())
981 continue;
982 if (MO.getReg() != ARM::CPSR)
983 continue;
984
985 DefCPSR = true;
986 if (!MO.isDead())
987 HasDef = true;
988 }
989
990 return HasDef || LiveCPSR;
991}
992
993static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
994 for (const MachineOperand &MO : MI.operands()) {
995 if (!MO.isReg() || MO.isUndef() || MO.isDef())
996 continue;
997 if (MO.getReg() != ARM::CPSR)
998 continue;
999 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
1000 if (MO.isKill()) {
1001 LiveCPSR = false;
1002 break;
1003 }
1004 }
1005
1006 return LiveCPSR;
1007}
1008
1009bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1010 bool LiveCPSR, bool IsSelfLoop,
1011 bool SkipPrologueEpilogue) {
1012 unsigned Opcode = MI->getOpcode();
1013 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
1014 if (OPI == ReduceOpcodeMap.end())
1015 return false;
1016 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
1017 MI->getFlag(MachineInstr::FrameDestroy)))
1018 return false;
1019 const ReduceEntry &Entry = ReduceTable[OPI->second];
1020
1021 // Don't attempt normal reductions on "special" cases for now.
1022 if (Entry.Special)
1023 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
1024
1025 // Try to transform to a 16-bit two-address instruction.
1026 if (Entry.NarrowOpc2 &&
1027 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1028 return true;
1029
1030 // Try to transform to a 16-bit non-two-address instruction.
1031 if (Entry.NarrowOpc1 &&
1032 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1033 return true;
1034
1035 return false;
1036}
1037
1038bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
1039 bool SkipPrologueEpilogue) {
1040 bool Modified = false;
1041
1042 // Yes, CPSR could be livein.
1043 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1044 MachineInstr *BundleMI = nullptr;
1045
1046 CPSRDef = nullptr;
1047 HighLatencyCPSR = false;
1048
1049 // Check predecessors for the latest CPSRDef.
1050 for (auto *Pred : MBB.predecessors()) {
1051 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1052 if (!PInfo.Visited) {
1053 // Since blocks are visited in RPO, this must be a back-edge.
1054 continue;
1055 }
1056 if (PInfo.HighLatencyCPSR) {
1057 HighLatencyCPSR = true;
1058 break;
1059 }
1060 }
1061
1062 // If this BB loops back to itself, conservatively avoid narrowing the
1063 // first instruction that does partial flag update.
1064 bool IsSelfLoop = MBB.isSuccessor(&MBB);
1067 for (; MII != E; MII = NextMII) {
1068 NextMII = std::next(MII);
1069
1070 MachineInstr *MI = &*MII;
1071 if (MI->isBundle()) {
1072 BundleMI = MI;
1073 continue;
1074 }
1075 if (MI->isDebugInstr())
1076 continue;
1077
1078 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1079
1080 // Does NextMII belong to the same bundle as MI?
1081 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1082
1083 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1084 Modified = true;
1085 MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1086 MI = &*I;
1087 // Removing and reinserting the first instruction in a bundle will break
1088 // up the bundle. Fix the bundling if it was broken.
1089 if (NextInSameBundle && !NextMII->isBundledWithPred())
1090 NextMII->bundleWithPred();
1091 }
1092
1093 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1094 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1095 // marker is only on the BUNDLE instruction. Process the BUNDLE
1096 // instruction as we finish with the bundled instruction to work around
1097 // the inconsistency.
1098 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1099 LiveCPSR = false;
1100 MachineOperand *MO =
1101 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr);
1102 if (MO && !MO->isDead())
1103 LiveCPSR = true;
1104 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr);
1105 if (MO && !MO->isKill())
1106 LiveCPSR = true;
1107 }
1108
1109 bool DefCPSR = false;
1110 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1111 if (MI->isCall()) {
1112 // Calls don't really set CPSR.
1113 CPSRDef = nullptr;
1114 HighLatencyCPSR = false;
1115 IsSelfLoop = false;
1116 } else if (DefCPSR) {
1117 // This is the last CPSR defining instruction.
1118 CPSRDef = MI;
1119 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1120 IsSelfLoop = false;
1121 }
1122 }
1123
1124 MBBInfo &Info = BlockInfo[MBB.getNumber()];
1125 Info.HighLatencyCPSR = HighLatencyCPSR;
1126 Info.Visited = true;
1127 return Modified;
1128}
1129
1130bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1131 if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1132 return false;
1133
1134 STI = &MF.getSubtarget<ARMSubtarget>();
1135 if (STI->isThumb1Only() || STI->prefers32BitThumb())
1136 return false;
1137
1138 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1139
1140 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1141 OptimizeSize = MF.getFunction().hasOptSize();
1142 MinimizeSize = STI->hasMinSize();
1143
1144 BlockInfo.clear();
1145 BlockInfo.resize(MF.getNumBlockIDs());
1146
1147 // Visit blocks in reverse post-order so LastCPSRDef is known for all
1148 // predecessors.
1149 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1150 bool Modified = false;
1151 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1153 for (MachineBasicBlock *MBB : RPOT)
1154 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
1155 return Modified;
1156}
1157
1158/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1159/// reduction pass.
1161 std::function<bool(const Function &)> Ftor) {
1162 return new Thumb2SizeReduce(std::move(Ftor));
1163}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
dot regions Print regions of function to dot true view regions View regions of function(with no function bodies)"
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static cl::opt< int > ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimit2Addr("t2-reduce-limit2", cl::init(-1), cl::Hidden)
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID)
static bool isHighLatencyCPSR(MachineInstr *Def)
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR)
static bool VerifyLowRegs(MachineInstr *MI)
#define THUMB2_SIZE_REDUCE_NAME
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR)
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool hasMinSize() const
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
bool usesWindowsCFI() const
Definition MCAsmInfo.h:665
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
bool isPredicable() const
Return true if this instruction has a predicate operand that controls execution.
unsigned getOpcode() const
Return the opcode number for this descriptor.
instr_iterator erase_instr(MachineInstr *I)
Remove an instruction from the instruction list and delete it.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Instructions::iterator instr_iterator
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
mop_range operands()
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
static const unsigned CommuteAnyOperandIndex
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
constexpr RegState getInternalReadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
FunctionPass * createThumb2SizeReductionPass(std::function< bool(const Function &)> Ftor=nullptr)
createThumb2SizeReductionPass - Returns an instance of the Thumb2 size reduction pass.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870