LLVM 20.0.0git
Thumb2SizeReduction.cpp
Go to the documentation of this file.
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
10#include "ARMBaseInstrInfo.h"
11#include "ARMSubtarget.h"
13#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/DebugLoc.h"
29#include "llvm/IR/Function.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Support/Debug.h"
37#include <cassert>
38#include <cstdint>
39#include <functional>
40#include <iterator>
41#include <utility>
42
43using namespace llvm;
44
45#define DEBUG_TYPE "thumb2-reduce-size"
46#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
47
48STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
49STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
50STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
51
52static cl::opt<int> ReduceLimit("t2-reduce-limit",
53 cl::init(-1), cl::Hidden);
54static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
55 cl::init(-1), cl::Hidden);
56static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
57 cl::init(-1), cl::Hidden);
58
59namespace {
60
61 /// ReduceTable - A static table with information on mapping from wide
62 /// opcodes to narrow
63 struct ReduceEntry {
64 uint16_t WideOpc; // Wide opcode
65 uint16_t NarrowOpc1; // Narrow opcode to transform to
66 uint16_t NarrowOpc2; // Narrow opcode when it's two-address
67 uint8_t Imm1Limit; // Limit of immediate field (bits)
68 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
69 unsigned LowRegs1 : 1; // Only possible if low-registers are used
70 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
71 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
72 // 1 - No cc field.
73 // 2 - Always set CPSR.
74 unsigned PredCC2 : 2;
75 unsigned PartFlag : 1; // 16-bit instruction does partial flag update
76 unsigned Special : 1; // Needs to be dealt with specially
77 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
78 };
79
80 static const ReduceEntry ReduceTable[] = {
81 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
82 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
83 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
84 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
85 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
86 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
87 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
88 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
89 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
90 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
91 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
92 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
93 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
94 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
95 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
96 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
97 // FIXME: adr.n immediate offset must be multiple of 4.
98 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
99 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
100 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
101 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
104 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
105 // FIXME: Do we need the 16-bit 'S' variant?
106 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
107 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
108 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
109 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
110 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
111 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
112 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
114 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
115 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
116 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
117 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
118 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
119 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
120 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
121 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
122 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
123 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
124 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
125 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
126 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
127
128 // FIXME: Clean this up after splitting each Thumb load / store opcode
129 // into multiple ones.
130 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
131 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
132 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
146
147 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
148 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
149 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
150 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
151 // tSTMIA_UPD is a change in semantics which can only be used if the base
152 // register is killed. This difference is correctly handled elsewhere.
153 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
154 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
155 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
156 };
157
158 class Thumb2SizeReduce : public MachineFunctionPass {
159 public:
160 static char ID;
161
162 const Thumb2InstrInfo *TII;
163 const ARMSubtarget *STI;
164
165 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
166
167 bool runOnMachineFunction(MachineFunction &MF) override;
168
171 MachineFunctionProperties::Property::NoVRegs);
172 }
173
174 StringRef getPassName() const override {
176 }
177
178 private:
179 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
180 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
181
182 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
183
184 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
185 bool is2Addr, ARMCC::CondCodes Pred,
186 bool LiveCPSR, bool &HasCC, bool &CCDead);
187
188 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
189 const ReduceEntry &Entry);
190
191 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
192 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
193
194 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
195 /// instruction.
196 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
197 const ReduceEntry &Entry, bool LiveCPSR,
198 bool IsSelfLoop);
199
200 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
201 /// non-two-address instruction.
202 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
203 const ReduceEntry &Entry, bool LiveCPSR,
204 bool IsSelfLoop);
205
206 /// ReduceMI - Attempt to reduce MI, return true on success.
207 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
208 bool IsSelfLoop, bool SkipPrologueEpilogue);
209
210 /// ReduceMBB - Reduce width of instructions in the specified basic block.
211 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
212
213 bool OptimizeSize;
214 bool MinimizeSize;
215
216 // Last instruction to define CPSR in the current block.
217 MachineInstr *CPSRDef;
218 // Was CPSR last defined by a high latency instruction?
219 // When CPSRDef is null, this refers to CPSR defs in predecessors.
220 bool HighLatencyCPSR;
221
222 struct MBBInfo {
223 // The flags leaving this block have high latency.
224 bool HighLatencyCPSR = false;
225 // Has this block been visited yet?
226 bool Visited = false;
227
228 MBBInfo() = default;
229 };
230
231 SmallVector<MBBInfo, 8> BlockInfo;
232
233 std::function<bool(const Function &)> PredicateFtor;
234 };
235
236 char Thumb2SizeReduce::ID = 0;
237
238} // end anonymous namespace
239
241 false)
242
243Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
244 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
245 OptimizeSize = MinimizeSize = false;
246 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
247 unsigned FromOpc = ReduceTable[i].WideOpc;
248 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
249 llvm_unreachable("Duplicated entries?");
250 }
251}
252
253static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
254 return is_contained(MCID.implicit_defs(), ARM::CPSR);
255}
256
257// Check for a likely high-latency flag def.
259 switch(Def->getOpcode()) {
260 case ARM::FMSTAT:
261 case ARM::tMUL:
262 return true;
263 }
264 return false;
265}
266
267/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
268/// the 's' 16-bit instruction partially update CPSR. Abort the
269/// transformation to avoid adding false dependency on last CPSR setting
270/// instruction which hurts the ability for out-of-order execution engine
271/// to do register renaming magic.
272/// This function checks if there is a read-of-write dependency between the
273/// last instruction that defines the CPSR and the current instruction. If there
274/// is, then there is no harm done since the instruction cannot be retired
275/// before the CPSR setting instruction anyway.
276/// Note, we are not doing full dependency analysis here for the sake of compile
277/// time. We're not looking for cases like:
278/// r0 = muls ...
279/// r1 = add.w r0, ...
280/// ...
281/// = mul.w r1
282/// In this case it would have been ok to narrow the mul.w to muls since there
283/// are indirect RAW dependency between the muls and the mul.w
284bool
285Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
286 // Disable the check for -Oz (aka OptimizeForSizeHarder).
287 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
288 return false;
289
290 if (!CPSRDef)
291 // If this BB loops back to itself, conservatively avoid narrowing the
292 // first instruction that does partial flag update.
293 return HighLatencyCPSR || FirstInSelfLoop;
294
296 for (const MachineOperand &MO : CPSRDef->operands()) {
297 if (!MO.isReg() || MO.isUndef() || MO.isUse())
298 continue;
299 Register Reg = MO.getReg();
300 if (Reg == 0 || Reg == ARM::CPSR)
301 continue;
302 Defs.insert(Reg);
303 }
304
305 for (const MachineOperand &MO : Use->operands()) {
306 if (!MO.isReg() || MO.isUndef() || MO.isDef())
307 continue;
308 Register Reg = MO.getReg();
309 if (Defs.count(Reg))
310 return false;
311 }
312
313 // If the current CPSR has high latency, try to avoid the false dependency.
314 if (HighLatencyCPSR)
315 return true;
316
317 // tMOVi8 usually doesn't start long dependency chains, and there are a lot
318 // of them, so always shrink them when CPSR doesn't have high latency.
319 if (Use->getOpcode() == ARM::t2MOVi ||
320 Use->getOpcode() == ARM::t2MOVi16)
321 return false;
322
323 // No read-after-write dependency. The narrowing will add false dependency.
324 return true;
325}
326
327bool
328Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
329 bool is2Addr, ARMCC::CondCodes Pred,
330 bool LiveCPSR, bool &HasCC, bool &CCDead) {
331 if ((is2Addr && Entry.PredCC2 == 0) ||
332 (!is2Addr && Entry.PredCC1 == 0)) {
333 if (Pred == ARMCC::AL) {
334 // Not predicated, must set CPSR.
335 if (!HasCC) {
336 // Original instruction was not setting CPSR, but CPSR is not
337 // currently live anyway. It's ok to set it. The CPSR def is
338 // dead though.
339 if (!LiveCPSR) {
340 HasCC = true;
341 CCDead = true;
342 return true;
343 }
344 return false;
345 }
346 } else {
347 // Predicated, must not set CPSR.
348 if (HasCC)
349 return false;
350 }
351 } else if ((is2Addr && Entry.PredCC2 == 2) ||
352 (!is2Addr && Entry.PredCC1 == 2)) {
353 /// Old opcode has an optional def of CPSR.
354 if (HasCC)
355 return true;
356 // If old opcode does not implicitly define CPSR, then it's not ok since
357 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
358 if (!HasImplicitCPSRDef(MI->getDesc()))
359 return false;
360 HasCC = true;
361 } else {
362 // 16-bit instruction does not set CPSR.
363 if (HasCC)
364 return false;
365 }
366
367 return true;
368}
369
371 unsigned Opc = MI->getOpcode();
372 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
373 bool isLROk = (Opc == ARM::t2STMDB_UPD);
374 bool isSPOk = isPCOk || isLROk;
375 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
376 const MachineOperand &MO = MI->getOperand(i);
377 if (!MO.isReg() || MO.isImplicit())
378 continue;
379 Register Reg = MO.getReg();
380 if (Reg == 0 || Reg == ARM::CPSR)
381 continue;
382 if (isPCOk && Reg == ARM::PC)
383 continue;
384 if (isLROk && Reg == ARM::LR)
385 continue;
386 if (Reg == ARM::SP) {
387 if (isSPOk)
388 continue;
389 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
390 // Special case for these ldr / str with sp as base register.
391 continue;
392 }
393 if (!isARMLowRegister(Reg))
394 return false;
395 }
396 return true;
397}
398
399bool
400Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
401 const ReduceEntry &Entry) {
402 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
403 return false;
404
405 unsigned Scale = 1;
406 bool HasImmOffset = false;
407 bool HasShift = false;
408 bool HasOffReg = true;
409 bool isLdStMul = false;
410 unsigned Opc = Entry.NarrowOpc1;
411 unsigned OpNum = 3; // First 'rest' of operands.
412 uint8_t ImmLimit = Entry.Imm1Limit;
413
414 switch (Entry.WideOpc) {
415 default:
416 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
417 case ARM::t2LDRi12:
418 case ARM::t2STRi12:
419 if (MI->getOperand(1).getReg() == ARM::SP) {
420 Opc = Entry.NarrowOpc2;
421 ImmLimit = Entry.Imm2Limit;
422 }
423
424 Scale = 4;
425 HasImmOffset = true;
426 HasOffReg = false;
427 break;
428 case ARM::t2LDRBi12:
429 case ARM::t2STRBi12:
430 HasImmOffset = true;
431 HasOffReg = false;
432 break;
433 case ARM::t2LDRHi12:
434 case ARM::t2STRHi12:
435 Scale = 2;
436 HasImmOffset = true;
437 HasOffReg = false;
438 break;
439 case ARM::t2LDRs:
440 case ARM::t2LDRBs:
441 case ARM::t2LDRHs:
442 case ARM::t2LDRSBs:
443 case ARM::t2LDRSHs:
444 case ARM::t2STRs:
445 case ARM::t2STRBs:
446 case ARM::t2STRHs:
447 HasShift = true;
448 OpNum = 4;
449 break;
450 case ARM::t2LDR_POST:
451 case ARM::t2STR_POST: {
452 if (!MinimizeSize)
453 return false;
454
455 if (!MI->hasOneMemOperand() ||
456 (*MI->memoperands_begin())->getAlign() < Align(4))
457 return false;
458
459 // We're creating a completely different type of load/store - LDM from LDR.
460 // For this reason we can't reuse the logic at the end of this function; we
461 // have to implement the MI building here.
462 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
463 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
464 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
465 unsigned Offset = MI->getOperand(3).getImm();
466 unsigned PredImm = MI->getOperand(4).getImm();
467 Register PredReg = MI->getOperand(5).getReg();
470
471 if (Offset != 4)
472 return false;
473
474 // Add the 16-bit load / store instruction.
475 DebugLoc dl = MI->getDebugLoc();
476 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
478 .addReg(Rn)
479 .addImm(PredImm)
480 .addReg(PredReg)
481 .addReg(Rt, IsStore ? 0 : RegState::Define);
482
483 // Transfer memoperands.
484 MIB.setMemRefs(MI->memoperands());
485
486 // Transfer MI flags.
487 MIB.setMIFlags(MI->getFlags());
488
489 // Kill the old instruction.
490 MI->eraseFromBundle();
491 ++NumLdSts;
492 return true;
493 }
494 case ARM::t2LDMIA: {
495 Register BaseReg = MI->getOperand(0).getReg();
496 assert(isARMLowRegister(BaseReg));
497
498 // For the non-writeback version (this one), the base register must be
499 // one of the registers being loaded.
500 bool isOK = false;
501 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
502 if (MO.getReg() == BaseReg) {
503 isOK = true;
504 break;
505 }
506 }
507
508 if (!isOK)
509 return false;
510
511 OpNum = 0;
512 isLdStMul = true;
513 break;
514 }
515 case ARM::t2STMIA: {
516 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
517 // if the base register is killed, as then it doesn't matter what its value
518 // is after the instruction.
519 if (!MI->getOperand(0).isKill())
520 return false;
521
522 // If the base register is in the register list and isn't the lowest
523 // numbered register (i.e. it's in operand 4 onwards) then with writeback
524 // the stored value is unknown, so we can't convert to tSTMIA_UPD.
525 Register BaseReg = MI->getOperand(0).getReg();
526 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
527 if (MO.getReg() == BaseReg)
528 return false;
529
530 break;
531 }
532 case ARM::t2LDMIA_RET: {
533 Register BaseReg = MI->getOperand(1).getReg();
534 if (BaseReg != ARM::SP)
535 return false;
536 Opc = Entry.NarrowOpc2; // tPOP_RET
537 OpNum = 2;
538 isLdStMul = true;
539 break;
540 }
541 case ARM::t2LDMIA_UPD:
542 case ARM::t2STMIA_UPD:
543 case ARM::t2STMDB_UPD: {
544 OpNum = 0;
545
546 Register BaseReg = MI->getOperand(1).getReg();
547 if (BaseReg == ARM::SP &&
548 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
549 Entry.WideOpc == ARM::t2STMDB_UPD)) {
550 Opc = Entry.NarrowOpc2; // tPOP or tPUSH
551 OpNum = 2;
552 } else if (!isARMLowRegister(BaseReg) ||
553 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
554 Entry.WideOpc != ARM::t2STMIA_UPD)) {
555 return false;
556 }
557
558 isLdStMul = true;
559 break;
560 }
561 }
562
563 unsigned OffsetReg = 0;
564 bool OffsetKill = false;
565 bool OffsetInternal = false;
566 if (HasShift) {
567 OffsetReg = MI->getOperand(2).getReg();
568 OffsetKill = MI->getOperand(2).isKill();
569 OffsetInternal = MI->getOperand(2).isInternalRead();
570
571 if (MI->getOperand(3).getImm())
572 // Thumb1 addressing mode doesn't support shift.
573 return false;
574 }
575
576 unsigned OffsetImm = 0;
577 if (HasImmOffset) {
578 OffsetImm = MI->getOperand(2).getImm();
579 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
580
581 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
582 // Make sure the immediate field fits.
583 return false;
584 }
585
586 // Add the 16-bit load / store instruction.
587 DebugLoc dl = MI->getDebugLoc();
588 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
589
590 // tSTMIA_UPD takes a defining register operand. We've already checked that
591 // the register is killed, so mark it as dead here.
592 if (Entry.WideOpc == ARM::t2STMIA)
593 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
594
595 if (!isLdStMul) {
596 MIB.add(MI->getOperand(0));
597 MIB.add(MI->getOperand(1));
598
599 if (HasImmOffset)
600 MIB.addImm(OffsetImm / Scale);
601
602 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
603
604 if (HasOffReg)
605 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
606 getInternalReadRegState(OffsetInternal));
607 }
608
609 // Transfer the rest of operands.
610 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
611 MIB.add(MO);
612
613 // Transfer memoperands.
614 MIB.setMemRefs(MI->memoperands());
615
616 // Transfer MI flags.
617 MIB.setMIFlags(MI->getFlags());
618
619 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
620 << " to 16-bit: " << *MIB);
621
623 ++NumLdSts;
624 return true;
625}
626
627bool
628Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
629 const ReduceEntry &Entry,
630 bool LiveCPSR, bool IsSelfLoop) {
631 unsigned Opc = MI->getOpcode();
632 if (Opc == ARM::t2ADDri) {
633 // If the source register is SP, try to reduce to tADDrSPi, otherwise
634 // it's a normal reduce.
635 if (MI->getOperand(1).getReg() != ARM::SP) {
636 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
637 return true;
638 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
639 }
640 // Try to reduce to tADDrSPi.
641 unsigned Imm = MI->getOperand(2).getImm();
642 // The immediate must be in range, the destination register must be a low
643 // reg, the predicate must be "always" and the condition flags must not
644 // be being set.
645 if (Imm & 3 || Imm > 1020)
646 return false;
647 if (!isARMLowRegister(MI->getOperand(0).getReg()))
648 return false;
649 if (MI->getOperand(3).getImm() != ARMCC::AL)
650 return false;
651 const MCInstrDesc &MCID = MI->getDesc();
652 if (MCID.hasOptionalDef() &&
653 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
654 return false;
655
657 BuildMI(MBB, MI, MI->getDebugLoc(),
658 TII->get(ARM::tADDrSPi))
659 .add(MI->getOperand(0))
660 .add(MI->getOperand(1))
661 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
663
664 // Transfer MI flags.
665 MIB.setMIFlags(MI->getFlags());
666
667 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
668 << " to 16-bit: " << *MIB);
669
671 ++NumNarrows;
672 return true;
673 }
674
675 if (Entry.LowRegs1 && !VerifyLowRegs(MI))
676 return false;
677
678 if (MI->mayLoadOrStore())
679 return ReduceLoadStore(MBB, MI, Entry);
680
681 switch (Opc) {
682 default: break;
683 case ARM::t2ADDSri:
684 case ARM::t2ADDSrr: {
685 Register PredReg;
686 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
687 switch (Opc) {
688 default: break;
689 case ARM::t2ADDSri:
690 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
691 return true;
692 [[fallthrough]];
693 case ARM::t2ADDSrr:
694 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
695 }
696 }
697 break;
698 }
699 case ARM::t2RSBri:
700 case ARM::t2RSBSri:
701 case ARM::t2SXTB:
702 case ARM::t2SXTH:
703 case ARM::t2UXTB:
704 case ARM::t2UXTH:
705 if (MI->getOperand(2).getImm() == 0)
706 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
707 break;
708 case ARM::t2MOVi16:
709 // Can convert only 'pure' immediate operands, not immediates obtained as
710 // globals' addresses.
711 if (MI->getOperand(1).isImm())
712 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
713 break;
714 case ARM::t2CMPrr: {
715 // Try to reduce to the lo-reg only version first. Why there are two
716 // versions of the instruction is a mystery.
717 // It would be nice to just have two entries in the main table that
718 // are prioritized, but the table assumes a unique entry for each
719 // source insn opcode. So for now, we hack a local entry record to use.
720 static const ReduceEntry NarrowEntry =
721 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
722 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
723 return true;
724 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
725 }
726 case ARM::t2TEQrr: {
727 Register PredReg;
728 // Can only convert to eors if we're not in an IT block.
729 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
730 break;
731 // TODO if Operand 0 is not killed but Operand 1 is, then we could write
732 // to Op1 instead.
733 if (MI->getOperand(0).isKill())
734 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
735 }
736 }
737 return false;
738}
739
740bool
741Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
742 const ReduceEntry &Entry,
743 bool LiveCPSR, bool IsSelfLoop) {
744 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
745 return false;
746
747 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
748 // Don't issue movs with shifter operand for some CPUs unless we
749 // are optimizing for size.
750 return false;
751
752 Register Reg0 = MI->getOperand(0).getReg();
753 Register Reg1 = MI->getOperand(1).getReg();
754 // t2MUL is "special". The tied source operand is second, not first.
755 if (MI->getOpcode() == ARM::t2MUL) {
756 // MULS can be slower than MUL
757 if (!MinimizeSize && STI->avoidMULS())
758 return false;
759 Register Reg2 = MI->getOperand(2).getReg();
760 // Early exit if the regs aren't all low regs.
761 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
762 || !isARMLowRegister(Reg2))
763 return false;
764 if (Reg0 != Reg2) {
765 // If the other operand also isn't the same as the destination, we
766 // can't reduce.
767 if (Reg1 != Reg0)
768 return false;
769 // Try to commute the operands to make it a 2-address instruction.
770 MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
771 if (!CommutedMI)
772 return false;
773 }
774 } else if (Reg0 != Reg1) {
775 // Try to commute the operands to make it a 2-address instruction.
776 unsigned CommOpIdx1 = 1;
777 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
778 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
779 MI->getOperand(CommOpIdx2).getReg() != Reg0)
780 return false;
781 MachineInstr *CommutedMI =
782 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
783 if (!CommutedMI)
784 return false;
785 }
786 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
787 return false;
788 if (Entry.Imm2Limit) {
789 unsigned Imm = MI->getOperand(2).getImm();
790 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
791 if (Imm > Limit)
792 return false;
793 } else {
794 Register Reg2 = MI->getOperand(2).getReg();
795 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
796 return false;
797 }
798
799 // Check if it's possible / necessary to transfer the predicate.
800 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
801 Register PredReg;
802 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
803 bool SkipPred = false;
804 if (Pred != ARMCC::AL) {
805 if (!NewMCID.isPredicable())
806 // Can't transfer predicate, fail.
807 return false;
808 } else {
809 SkipPred = !NewMCID.isPredicable();
810 }
811
812 bool HasCC = false;
813 bool CCDead = false;
814 const MCInstrDesc &MCID = MI->getDesc();
815 if (MCID.hasOptionalDef()) {
816 unsigned NumOps = MCID.getNumOperands();
817 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
818 if (HasCC && MI->getOperand(NumOps-1).isDead())
819 CCDead = true;
820 }
821 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
822 return false;
823
824 // Avoid adding a false dependency on partial flag update by some 16-bit
825 // instructions which has the 's' bit set.
826 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
827 canAddPseudoFlagDep(MI, IsSelfLoop))
828 return false;
829
830 // Add the 16-bit instruction.
831 DebugLoc dl = MI->getDebugLoc();
832 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
833 MIB.add(MI->getOperand(0));
834 if (NewMCID.hasOptionalDef())
835 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
836
837 // Transfer the rest of operands.
838 unsigned NumOps = MCID.getNumOperands();
839 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
840 if (i < NumOps && MCID.operands()[i].isOptionalDef())
841 continue;
842 if (SkipPred && MCID.operands()[i].isPredicate())
843 continue;
844 MIB.add(MI->getOperand(i));
845 }
846
847 // Transfer MI flags.
848 MIB.setMIFlags(MI->getFlags());
849
850 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
851 << " to 16-bit: " << *MIB);
852
854 ++Num2Addrs;
855 return true;
856}
857
858bool
859Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
860 const ReduceEntry &Entry,
861 bool LiveCPSR, bool IsSelfLoop) {
862 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
863 return false;
864
865 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
866 // Don't issue movs with shifter operand for some CPUs unless we
867 // are optimizing for size.
868 return false;
869
870 unsigned Limit = ~0U;
871 if (Entry.Imm1Limit)
872 Limit = (1 << Entry.Imm1Limit) - 1;
873
874 const MCInstrDesc &MCID = MI->getDesc();
875 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
876 if (MCID.operands()[i].isPredicate())
877 continue;
878 const MachineOperand &MO = MI->getOperand(i);
879 if (MO.isReg()) {
880 Register Reg = MO.getReg();
881 if (!Reg || Reg == ARM::CPSR)
882 continue;
883 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
884 return false;
885 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
886 if (((unsigned)MO.getImm()) > Limit)
887 return false;
888 }
889 }
890
891 // Check if it's possible / necessary to transfer the predicate.
892 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
893 Register PredReg;
894 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
895 bool SkipPred = false;
896 if (Pred != ARMCC::AL) {
897 if (!NewMCID.isPredicable())
898 // Can't transfer predicate, fail.
899 return false;
900 } else {
901 SkipPred = !NewMCID.isPredicable();
902 }
903
904 bool HasCC = false;
905 bool CCDead = false;
906 if (MCID.hasOptionalDef()) {
907 unsigned NumOps = MCID.getNumOperands();
908 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
909 if (HasCC && MI->getOperand(NumOps-1).isDead())
910 CCDead = true;
911 }
912 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
913 return false;
914
915 // Avoid adding a false dependency on partial flag update by some 16-bit
916 // instructions which has the 's' bit set.
917 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
918 canAddPseudoFlagDep(MI, IsSelfLoop))
919 return false;
920
921 // Add the 16-bit instruction.
922 DebugLoc dl = MI->getDebugLoc();
923 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
924
925 // TEQ is special in that it doesn't define a register but we're converting
926 // it into an EOR which does. So add the first operand as a def and then
927 // again as a use.
928 if (MCID.getOpcode() == ARM::t2TEQrr) {
929 MIB.add(MI->getOperand(0));
930 MIB->getOperand(0).setIsKill(false);
931 MIB->getOperand(0).setIsDef(true);
932 MIB->getOperand(0).setIsDead(true);
933
934 if (NewMCID.hasOptionalDef())
935 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
936 MIB.add(MI->getOperand(0));
937 } else {
938 MIB.add(MI->getOperand(0));
939 if (NewMCID.hasOptionalDef())
940 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
941 }
942
943 // Transfer the rest of operands.
944 unsigned NumOps = MCID.getNumOperands();
945 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
946 if (i < NumOps && MCID.operands()[i].isOptionalDef())
947 continue;
948 if ((MCID.getOpcode() == ARM::t2RSBSri ||
949 MCID.getOpcode() == ARM::t2RSBri ||
950 MCID.getOpcode() == ARM::t2SXTB ||
951 MCID.getOpcode() == ARM::t2SXTH ||
952 MCID.getOpcode() == ARM::t2UXTB ||
953 MCID.getOpcode() == ARM::t2UXTH) && i == 2)
954 // Skip the zero immediate operand, it's now implicit.
955 continue;
956 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
957 if (SkipPred && isPred)
958 continue;
959 const MachineOperand &MO = MI->getOperand(i);
960 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
961 // Skip implicit def of CPSR. Either it's modeled as an optional
962 // def now or it's already an implicit def on the new instruction.
963 continue;
964 MIB.add(MO);
965 }
966 if (!MCID.isPredicable() && NewMCID.isPredicable())
967 MIB.add(predOps(ARMCC::AL));
968
969 // Transfer MI flags.
970 MIB.setMIFlags(MI->getFlags());
971
972 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
973 << " to 16-bit: " << *MIB);
974
976 ++NumNarrows;
977 return true;
978}
979
980static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
981 bool HasDef = false;
982 for (const MachineOperand &MO : MI.operands()) {
983 if (!MO.isReg() || MO.isUndef() || MO.isUse())
984 continue;
985 if (MO.getReg() != ARM::CPSR)
986 continue;
987
988 DefCPSR = true;
989 if (!MO.isDead())
990 HasDef = true;
991 }
992
993 return HasDef || LiveCPSR;
994}
995
996static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
997 for (const MachineOperand &MO : MI.operands()) {
998 if (!MO.isReg() || MO.isUndef() || MO.isDef())
999 continue;
1000 if (MO.getReg() != ARM::CPSR)
1001 continue;
1002 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
1003 if (MO.isKill()) {
1004 LiveCPSR = false;
1005 break;
1006 }
1007 }
1008
1009 return LiveCPSR;
1010}
1011
1012bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1013 bool LiveCPSR, bool IsSelfLoop,
1014 bool SkipPrologueEpilogue) {
1015 unsigned Opcode = MI->getOpcode();
1016 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
1017 if (OPI == ReduceOpcodeMap.end())
1018 return false;
1019 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
1020 MI->getFlag(MachineInstr::FrameDestroy)))
1021 return false;
1022 const ReduceEntry &Entry = ReduceTable[OPI->second];
1023
1024 // Don't attempt normal reductions on "special" cases for now.
1025 if (Entry.Special)
1026 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
1027
1028 // Try to transform to a 16-bit two-address instruction.
1029 if (Entry.NarrowOpc2 &&
1030 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1031 return true;
1032
1033 // Try to transform to a 16-bit non-two-address instruction.
1034 if (Entry.NarrowOpc1 &&
1035 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1036 return true;
1037
1038 return false;
1039}
1040
1041bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
1042 bool SkipPrologueEpilogue) {
1043 bool Modified = false;
1044
1045 // Yes, CPSR could be livein.
1046 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1047 MachineInstr *BundleMI = nullptr;
1048
1049 CPSRDef = nullptr;
1050 HighLatencyCPSR = false;
1051
1052 // Check predecessors for the latest CPSRDef.
1053 for (auto *Pred : MBB.predecessors()) {
1054 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1055 if (!PInfo.Visited) {
1056 // Since blocks are visited in RPO, this must be a back-edge.
1057 continue;
1058 }
1059 if (PInfo.HighLatencyCPSR) {
1060 HighLatencyCPSR = true;
1061 break;
1062 }
1063 }
1064
1065 // If this BB loops back to itself, conservatively avoid narrowing the
1066 // first instruction that does partial flag update.
1067 bool IsSelfLoop = MBB.isSuccessor(&MBB);
1070 for (; MII != E; MII = NextMII) {
1071 NextMII = std::next(MII);
1072
1073 MachineInstr *MI = &*MII;
1074 if (MI->isBundle()) {
1075 BundleMI = MI;
1076 continue;
1077 }
1078 if (MI->isDebugInstr())
1079 continue;
1080
1081 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1082
1083 // Does NextMII belong to the same bundle as MI?
1084 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1085
1086 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1087 Modified = true;
1088 MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1089 MI = &*I;
1090 // Removing and reinserting the first instruction in a bundle will break
1091 // up the bundle. Fix the bundling if it was broken.
1092 if (NextInSameBundle && !NextMII->isBundledWithPred())
1093 NextMII->bundleWithPred();
1094 }
1095
1096 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1097 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1098 // marker is only on the BUNDLE instruction. Process the BUNDLE
1099 // instruction as we finish with the bundled instruction to work around
1100 // the inconsistency.
1101 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1102 LiveCPSR = false;
1103 MachineOperand *MO =
1104 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr);
1105 if (MO && !MO->isDead())
1106 LiveCPSR = true;
1107 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr);
1108 if (MO && !MO->isKill())
1109 LiveCPSR = true;
1110 }
1111
1112 bool DefCPSR = false;
1113 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1114 if (MI->isCall()) {
1115 // Calls don't really set CPSR.
1116 CPSRDef = nullptr;
1117 HighLatencyCPSR = false;
1118 IsSelfLoop = false;
1119 } else if (DefCPSR) {
1120 // This is the last CPSR defining instruction.
1121 CPSRDef = MI;
1122 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1123 IsSelfLoop = false;
1124 }
1125 }
1126
1127 MBBInfo &Info = BlockInfo[MBB.getNumber()];
1128 Info.HighLatencyCPSR = HighLatencyCPSR;
1129 Info.Visited = true;
1130 return Modified;
1131}
1132
1133bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1134 if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1135 return false;
1136
1137 STI = &MF.getSubtarget<ARMSubtarget>();
1138 if (STI->isThumb1Only() || STI->prefers32BitThumb())
1139 return false;
1140
1141 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1142
1143 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1144 OptimizeSize = MF.getFunction().hasOptSize();
1145 MinimizeSize = STI->hasMinSize();
1146
1147 BlockInfo.clear();
1148 BlockInfo.resize(MF.getNumBlockIDs());
1149
1150 // Visit blocks in reverse post-order so LastCPSRDef is known for all
1151 // predecessors.
1153 bool Modified = false;
1154 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1156 for (MachineBasicBlock *MBB : RPOT)
1157 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
1158 return Modified;
1159}
1160
1161/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1162/// reduction pass.
1164 std::function<bool(const Function &)> Ftor) {
1165 return new Thumb2SizeReduce(std::move(Ftor));
1166}
aarch64 promote const
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Performs the initial survey of the specified function
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static cl::opt< int > ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimit2Addr("t2-reduce-limit2", cl::init(-1), cl::Hidden)
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID)
static bool isHighLatencyCPSR(MachineInstr *Def)
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR)
static bool VerifyLowRegs(MachineInstr *MI)
#define THUMB2_SIZE_REDUCE_NAME
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR)
#define DEBUG_TYPE
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb1Only() const
Definition: ARMSubtarget.h:403
bool hasMinSize() const
Definition: ARMSubtarget.h:402
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:682
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool isPredicable() const
Return true if this instruction has a predicate operand that controls execution.
Definition: MCInstrDesc.h:338
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
instr_iterator instr_begin()
instr_iterator erase_instr(MachineInstr *I)
Remove an instruction from the instruction list and delete it.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Instructions::iterator instr_iterator
instr_iterator instr_end()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static const unsigned CommuteAnyOperandIndex
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned getInternalReadRegState(bool B)
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
FunctionPass * createThumb2SizeReductionPass(std::function< bool(const Function &)> Ftor=nullptr)
createThumb2SizeReductionPass - Returns an instance of the Thumb2 size reduction pass.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39