Line data Source code
1 : //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file contains the Base ARM implementation of the TargetInstrInfo class.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "ARMBaseInstrInfo.h"
15 : #include "ARMBaseRegisterInfo.h"
16 : #include "ARMConstantPoolValue.h"
17 : #include "ARMFeatures.h"
18 : #include "ARMHazardRecognizer.h"
19 : #include "ARMMachineFunctionInfo.h"
20 : #include "ARMSubtarget.h"
21 : #include "MCTargetDesc/ARMAddressingModes.h"
22 : #include "MCTargetDesc/ARMBaseInfo.h"
23 : #include "llvm/ADT/DenseMap.h"
24 : #include "llvm/ADT/STLExtras.h"
25 : #include "llvm/ADT/SmallSet.h"
26 : #include "llvm/ADT/SmallVector.h"
27 : #include "llvm/ADT/Triple.h"
28 : #include "llvm/CodeGen/LiveVariables.h"
29 : #include "llvm/CodeGen/MachineBasicBlock.h"
30 : #include "llvm/CodeGen/MachineConstantPool.h"
31 : #include "llvm/CodeGen/MachineFrameInfo.h"
32 : #include "llvm/CodeGen/MachineFunction.h"
33 : #include "llvm/CodeGen/MachineInstr.h"
34 : #include "llvm/CodeGen/MachineInstrBuilder.h"
35 : #include "llvm/CodeGen/MachineMemOperand.h"
36 : #include "llvm/CodeGen/MachineOperand.h"
37 : #include "llvm/CodeGen/MachineRegisterInfo.h"
38 : #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
39 : #include "llvm/CodeGen/SelectionDAGNodes.h"
40 : #include "llvm/CodeGen/TargetInstrInfo.h"
41 : #include "llvm/CodeGen/TargetRegisterInfo.h"
42 : #include "llvm/CodeGen/TargetSchedule.h"
43 : #include "llvm/IR/Attributes.h"
44 : #include "llvm/IR/Constants.h"
45 : #include "llvm/IR/DebugLoc.h"
46 : #include "llvm/IR/Function.h"
47 : #include "llvm/IR/GlobalValue.h"
48 : #include "llvm/MC/MCAsmInfo.h"
49 : #include "llvm/MC/MCInstrDesc.h"
50 : #include "llvm/MC/MCInstrItineraries.h"
51 : #include "llvm/Support/BranchProbability.h"
52 : #include "llvm/Support/Casting.h"
53 : #include "llvm/Support/CommandLine.h"
54 : #include "llvm/Support/Compiler.h"
55 : #include "llvm/Support/Debug.h"
56 : #include "llvm/Support/ErrorHandling.h"
57 : #include "llvm/Support/raw_ostream.h"
58 : #include "llvm/Target/TargetMachine.h"
59 : #include <algorithm>
60 : #include <cassert>
61 : #include <cstdint>
62 : #include <iterator>
63 : #include <new>
64 : #include <utility>
65 : #include <vector>
66 :
67 : using namespace llvm;
68 :
69 : #define DEBUG_TYPE "arm-instrinfo"
70 :
71 : #define GET_INSTRINFO_CTOR_DTOR
72 : #include "ARMGenInstrInfo.inc"
73 :
74 : static cl::opt<bool>
75 : EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
76 : cl::desc("Enable ARM 2-addr to 3-addr conv"));
77 :
78 : /// ARM_MLxEntry - Record information about MLA / MLS instructions.
79 : struct ARM_MLxEntry {
80 : uint16_t MLxOpc; // MLA / MLS opcode
81 : uint16_t MulOpc; // Expanded multiplication opcode
82 : uint16_t AddSubOpc; // Expanded add / sub opcode
83 : bool NegAcc; // True if the acc is negated before the add / sub.
84 : bool HasLane; // True if instruction has an extra "lane" operand.
85 : };
86 :
87 : static const ARM_MLxEntry ARM_MLxTable[] = {
88 : // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 : // fp scalar ops
90 : { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 : { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 : { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 : { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 : { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 : { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 : { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 : { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98 :
99 : // fp SIMD ops
100 : { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 : { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 : { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 : { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 : { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 : { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 : { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 : { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108 : };
109 :
110 5050 : ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
111 : : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112 5050 : Subtarget(STI) {
113 85850 : for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
114 80800 : if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115 0 : llvm_unreachable("Duplicated entries?");
116 80800 : MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117 80800 : MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118 : }
119 5050 : }
120 :
121 : // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122 : // currently defaults to no prepass hazard recognizer.
123 : ScheduleHazardRecognizer *
124 11322 : ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
125 : const ScheduleDAG *DAG) const {
126 11322 : if (usePreRAHazardRecognizer()) {
127 : const InstrItineraryData *II =
128 11322 : static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129 11322 : return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130 : }
131 0 : return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
132 : }
133 :
134 11263 : ScheduleHazardRecognizer *ARMBaseInstrInfo::
135 : CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
136 : const ScheduleDAG *DAG) const {
137 11263 : if (Subtarget.isThumb2() || Subtarget.hasVFP2())
138 19190 : return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
139 1668 : return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
140 : }
141 :
142 0 : MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
143 : MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
144 : // FIXME: Thumb2 support.
145 :
146 0 : if (!EnableARM3Addr)
147 : return nullptr;
148 :
149 0 : MachineFunction &MF = *MI.getParent()->getParent();
150 0 : uint64_t TSFlags = MI.getDesc().TSFlags;
151 : bool isPre = false;
152 0 : switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
153 : default: return nullptr;
154 0 : case ARMII::IndexModePre:
155 : isPre = true;
156 0 : break;
157 : case ARMII::IndexModePost:
158 : break;
159 : }
160 :
161 : // Try splitting an indexed load/store to an un-indexed one plus an add/sub
162 : // operation.
163 0 : unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
164 0 : if (MemOpc == 0)
165 : return nullptr;
166 :
167 0 : MachineInstr *UpdateMI = nullptr;
168 0 : MachineInstr *MemMI = nullptr;
169 0 : unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
170 0 : const MCInstrDesc &MCID = MI.getDesc();
171 0 : unsigned NumOps = MCID.getNumOperands();
172 0 : bool isLoad = !MI.mayStore();
173 0 : const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
174 0 : const MachineOperand &Base = MI.getOperand(2);
175 0 : const MachineOperand &Offset = MI.getOperand(NumOps - 3);
176 0 : unsigned WBReg = WB.getReg();
177 0 : unsigned BaseReg = Base.getReg();
178 0 : unsigned OffReg = Offset.getReg();
179 0 : unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
180 0 : ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
181 0 : switch (AddrMode) {
182 0 : default: llvm_unreachable("Unknown indexed op!");
183 : case ARMII::AddrMode2: {
184 : bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
185 : unsigned Amt = ARM_AM::getAM2Offset(OffImm);
186 0 : if (OffReg == 0) {
187 0 : if (ARM_AM::getSOImmVal(Amt) == -1)
188 : // Can't encode it in a so_imm operand. This transformation will
189 : // add more than 1 instruction. Abandon!
190 : return nullptr;
191 0 : UpdateMI = BuildMI(MF, MI.getDebugLoc(),
192 0 : get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
193 0 : .addReg(BaseReg)
194 0 : .addImm(Amt)
195 0 : .add(predOps(Pred))
196 0 : .add(condCodeOp());
197 0 : } else if (Amt != 0) {
198 : ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
199 : unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
200 0 : UpdateMI = BuildMI(MF, MI.getDebugLoc(),
201 0 : get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
202 0 : .addReg(BaseReg)
203 0 : .addReg(OffReg)
204 0 : .addReg(0)
205 0 : .addImm(SOOpc)
206 0 : .add(predOps(Pred))
207 0 : .add(condCodeOp());
208 : } else
209 0 : UpdateMI = BuildMI(MF, MI.getDebugLoc(),
210 0 : get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
211 0 : .addReg(BaseReg)
212 0 : .addReg(OffReg)
213 0 : .add(predOps(Pred))
214 0 : .add(condCodeOp());
215 : break;
216 : }
217 : case ARMII::AddrMode3 : {
218 : bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
219 : unsigned Amt = ARM_AM::getAM3Offset(OffImm);
220 0 : if (OffReg == 0)
221 : // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
222 0 : UpdateMI = BuildMI(MF, MI.getDebugLoc(),
223 0 : get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
224 0 : .addReg(BaseReg)
225 : .addImm(Amt)
226 0 : .add(predOps(Pred))
227 0 : .add(condCodeOp());
228 : else
229 0 : UpdateMI = BuildMI(MF, MI.getDebugLoc(),
230 0 : get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
231 0 : .addReg(BaseReg)
232 0 : .addReg(OffReg)
233 0 : .add(predOps(Pred))
234 0 : .add(condCodeOp());
235 : break;
236 : }
237 : }
238 :
239 : std::vector<MachineInstr*> NewMIs;
240 0 : if (isPre) {
241 0 : if (isLoad)
242 0 : MemMI =
243 0 : BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
244 0 : .addReg(WBReg)
245 : .addImm(0)
246 0 : .addImm(Pred);
247 : else
248 0 : MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
249 0 : .addReg(MI.getOperand(1).getReg())
250 0 : .addReg(WBReg)
251 0 : .addReg(0)
252 : .addImm(0)
253 0 : .addImm(Pred);
254 0 : NewMIs.push_back(MemMI);
255 0 : NewMIs.push_back(UpdateMI);
256 : } else {
257 0 : if (isLoad)
258 0 : MemMI =
259 0 : BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
260 0 : .addReg(BaseReg)
261 : .addImm(0)
262 0 : .addImm(Pred);
263 : else
264 0 : MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
265 0 : .addReg(MI.getOperand(1).getReg())
266 0 : .addReg(BaseReg)
267 0 : .addReg(0)
268 : .addImm(0)
269 0 : .addImm(Pred);
270 0 : if (WB.isDead())
271 0 : UpdateMI->getOperand(0).setIsDead();
272 0 : NewMIs.push_back(UpdateMI);
273 0 : NewMIs.push_back(MemMI);
274 : }
275 :
276 : // Transfer LiveVariables states, kill / dead info.
277 0 : if (LV) {
278 0 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
279 0 : MachineOperand &MO = MI.getOperand(i);
280 0 : if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
281 : unsigned Reg = MO.getReg();
282 :
283 0 : LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284 0 : if (MO.isDef()) {
285 0 : MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
286 0 : if (MO.isDead())
287 0 : LV->addVirtualRegisterDead(Reg, *NewMI);
288 : }
289 0 : if (MO.isUse() && MO.isKill()) {
290 0 : for (unsigned j = 0; j < 2; ++j) {
291 : // Look at the two new MI's in reverse order.
292 0 : MachineInstr *NewMI = NewMIs[j];
293 0 : if (!NewMI->readsRegister(Reg))
294 0 : continue;
295 0 : LV->addVirtualRegisterKilled(Reg, *NewMI);
296 0 : if (VI.removeKill(MI))
297 0 : VI.Kills.push_back(NewMI);
298 0 : break;
299 : }
300 : }
301 : }
302 : }
303 : }
304 :
305 0 : MachineBasicBlock::iterator MBBI = MI.getIterator();
306 0 : MFI->insert(MBBI, NewMIs[1]);
307 0 : MFI->insert(MBBI, NewMIs[0]);
308 0 : return NewMIs[0];
309 : }
310 :
311 : // Branch analysis.
312 298728 : bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
313 : MachineBasicBlock *&TBB,
314 : MachineBasicBlock *&FBB,
315 : SmallVectorImpl<MachineOperand> &Cond,
316 : bool AllowModify) const {
317 298728 : TBB = nullptr;
318 298728 : FBB = nullptr;
319 :
320 298728 : MachineBasicBlock::iterator I = MBB.end();
321 298728 : if (I == MBB.begin())
322 : return false; // Empty blocks are easy.
323 : --I;
324 :
325 : // Walk backwards from the end of the basic block until the branch is
326 : // analyzed or we give up.
327 1131533 : while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
328 : // Flag to be raised on unanalyzeable instructions. This is useful in cases
329 : // where we want to clean up on the end of the basic block before we bail
330 : // out.
331 : bool CantAnalyze = false;
332 :
333 : // Skip over DEBUG values and predicated nonterminators.
334 267493 : while (I->isDebugInstr() || !I->isTerminator()) {
335 13853 : if (I == MBB.begin())
336 : return false;
337 : --I;
338 : }
339 :
340 507794 : if (isIndirectBranchOpcode(I->getOpcode()) ||
341 : isJumpTableBranchOpcode(I->getOpcode())) {
342 : // Indirect branches and jump tables can't be analyzed, but we still want
343 : // to clean up any instructions at the tail of the basic block.
344 : CantAnalyze = true;
345 : } else if (isUncondBranchOpcode(I->getOpcode())) {
346 32079 : TBB = I->getOperand(0).getMBB();
347 : } else if (isCondBranchOpcode(I->getOpcode())) {
348 : // Bail out if we encounter multiple conditional branches.
349 77707 : if (!Cond.empty())
350 : return true;
351 :
352 : assert(!FBB && "FBB should have been null.");
353 77661 : FBB = TBB;
354 77661 : TBB = I->getOperand(0).getMBB();
355 155322 : Cond.push_back(I->getOperand(1));
356 155322 : Cond.push_back(I->getOperand(2));
357 139813 : } else if (I->isReturn()) {
358 : // Returns can't be analyzed, but we should run cleanup.
359 268052 : CantAnalyze = !isPredicated(*I);
360 : } else {
361 : // We encountered other unrecognized terminator. Bail out immediately.
362 : return true;
363 : }
364 :
365 : // Cleanup code - to be run for unpredicated unconditional branches and
366 : // returns.
367 496128 : if (!isPredicated(*I) &&
368 337804 : (isUncondBranchOpcode(I->getOpcode()) ||
369 : isIndirectBranchOpcode(I->getOpcode()) ||
370 132566 : isJumpTableBranchOpcode(I->getOpcode()) ||
371 : I->isReturn())) {
372 : // Forget any previous condition branch information - it no longer applies.
373 : Cond.clear();
374 168902 : FBB = nullptr;
375 :
376 : // If we can modify the function, delete everything below this
377 : // unconditional branch.
378 168902 : if (AllowModify) {
379 66888 : MachineBasicBlock::iterator DI = std::next(I);
380 66894 : while (DI != MBB.end()) {
381 : MachineInstr &InstToDelete = *DI;
382 : ++DI;
383 6 : InstToDelete.eraseFromParent();
384 : }
385 : }
386 : }
387 :
388 248064 : if (CantAnalyze)
389 : return true;
390 :
391 111200 : if (I == MBB.begin())
392 : return false;
393 :
394 : --I;
395 : }
396 :
397 : // We made it past the terminators without bailing out - we must have
398 : // analyzed this branch successfully.
399 : return false;
400 : }
401 :
402 9708 : unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
403 : int *BytesRemoved) const {
404 : assert(!BytesRemoved && "code size not handled");
405 :
406 9708 : MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
407 9708 : if (I == MBB.end())
408 : return 0;
409 :
410 19412 : if (!isUncondBranchOpcode(I->getOpcode()) &&
411 : !isCondBranchOpcode(I->getOpcode()))
412 : return 0;
413 :
414 : // Remove the branch.
415 9302 : I->eraseFromParent();
416 :
417 9302 : I = MBB.end();
418 :
419 9302 : if (I == MBB.begin()) return 1;
420 : --I;
421 17768 : if (!isCondBranchOpcode(I->getOpcode()))
422 : return 1;
423 :
424 : // Remove the branch.
425 2289 : I->eraseFromParent();
426 2289 : return 2;
427 : }
428 :
429 8885 : unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
430 : MachineBasicBlock *TBB,
431 : MachineBasicBlock *FBB,
432 : ArrayRef<MachineOperand> Cond,
433 : const DebugLoc &DL,
434 : int *BytesAdded) const {
435 : assert(!BytesAdded && "code size not handled");
436 8885 : ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
437 8885 : int BOpc = !AFI->isThumbFunction()
438 8885 : ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
439 : int BccOpc = !AFI->isThumbFunction()
440 8885 : ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
441 8885 : bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
442 :
443 : // Shouldn't be a fall through.
444 : assert(TBB && "insertBranch must not be told to insert a fallthrough");
445 : assert((Cond.size() == 2 || Cond.size() == 0) &&
446 : "ARM branch conditions have two components!");
447 :
448 : // For conditional branches, we use addOperand to preserve CPSR flags.
449 :
450 8885 : if (!FBB) {
451 8709 : if (Cond.empty()) { // Unconditional branch?
452 2073 : if (isThumb)
453 2784 : BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
454 : else
455 681 : BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
456 : } else
457 6636 : BuildMI(&MBB, DL, get(BccOpc))
458 : .addMBB(TBB)
459 6636 : .addImm(Cond[0].getImm())
460 : .add(Cond[1]);
461 8709 : return 1;
462 : }
463 :
464 : // Two-way conditional branch.
465 176 : BuildMI(&MBB, DL, get(BccOpc))
466 : .addMBB(TBB)
467 176 : .addImm(Cond[0].getImm())
468 : .add(Cond[1]);
469 176 : if (isThumb)
470 212 : BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
471 : else
472 70 : BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
473 : return 2;
474 : }
475 :
476 9160 : bool ARMBaseInstrInfo::
477 : reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
478 9160 : ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
479 9160 : Cond[0].setImm(ARMCC::getOppositeCondition(CC));
480 9160 : return false;
481 : }
482 :
483 978436 : bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
484 978436 : if (MI.isBundle()) {
485 899 : MachineBasicBlock::const_instr_iterator I = MI.getIterator();
486 899 : MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
487 1836 : while (++I != E && I->isInsideBundle()) {
488 1798 : int PIdx = I->findFirstPredOperandIdx();
489 1798 : if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
490 : return true;
491 : }
492 : return false;
493 : }
494 :
495 977537 : int PIdx = MI.findFirstPredOperandIdx();
496 977537 : return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
497 : }
498 :
499 797 : bool ARMBaseInstrInfo::PredicateInstruction(
500 : MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
501 797 : unsigned Opc = MI.getOpcode();
502 797 : if (isUncondBranchOpcode(Opc)) {
503 0 : MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
504 0 : MachineInstrBuilder(*MI.getParent()->getParent(), MI)
505 0 : .addImm(Pred[0].getImm())
506 0 : .addReg(Pred[1].getReg());
507 0 : return true;
508 : }
509 :
510 797 : int PIdx = MI.findFirstPredOperandIdx();
511 797 : if (PIdx != -1) {
512 797 : MachineOperand &PMO = MI.getOperand(PIdx);
513 797 : PMO.setImm(Pred[0].getImm());
514 1594 : MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
515 797 : return true;
516 : }
517 : return false;
518 : }
519 :
520 110 : bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
521 : ArrayRef<MachineOperand> Pred2) const {
522 110 : if (Pred1.size() > 2 || Pred2.size() > 2)
523 : return false;
524 :
525 110 : ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
526 110 : ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
527 110 : if (CC1 == CC2)
528 : return true;
529 :
530 48 : switch (CC1) {
531 : default:
532 : return false;
533 0 : case ARMCC::AL:
534 0 : return true;
535 5 : case ARMCC::HS:
536 5 : return CC2 == ARMCC::HI;
537 6 : case ARMCC::LS:
538 6 : return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
539 3 : case ARMCC::GE:
540 3 : return CC2 == ARMCC::GT;
541 2 : case ARMCC::LE:
542 2 : return CC2 == ARMCC::LT;
543 : }
544 : }
545 :
546 41989 : bool ARMBaseInstrInfo::DefinesPredicate(
547 : MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
548 : bool Found = false;
549 237151 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
550 195162 : const MachineOperand &MO = MI.getOperand(i);
551 195162 : if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
552 133251 : (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
553 3273 : Pred.push_back(MO);
554 : Found = true;
555 : }
556 : }
557 :
558 41989 : return Found;
559 : }
560 :
561 60 : bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
562 381 : for (const auto &MO : MI.operands())
563 328 : if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
564 : return true;
565 : return false;
566 : }
567 :
568 0 : bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
569 : unsigned Op) const {
570 0 : const MachineOperand &Offset = MI.getOperand(Op + 1);
571 0 : return Offset.getReg() != 0;
572 : }
573 :
574 : // Load with negative register offset requires additional 1cyc and +I unit
575 : // for Cortex A57
576 0 : bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
577 : unsigned Op) const {
578 0 : const MachineOperand &Offset = MI.getOperand(Op + 1);
579 0 : const MachineOperand &Opc = MI.getOperand(Op + 2);
580 : assert(Opc.isImm());
581 : assert(Offset.isReg());
582 0 : int64_t OpcImm = Opc.getImm();
583 :
584 0 : bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
585 0 : return (isSub && Offset.getReg() != 0);
586 : }
587 :
588 0 : bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
589 : unsigned Op) const {
590 0 : const MachineOperand &Opc = MI.getOperand(Op + 2);
591 0 : unsigned OffImm = Opc.getImm();
592 0 : return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
593 : }
594 :
595 : // Load, scaled register offset, not plus LSL2
596 3 : bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
597 : unsigned Op) const {
598 3 : const MachineOperand &Opc = MI.getOperand(Op + 2);
599 3 : unsigned OffImm = Opc.getImm();
600 :
601 3 : bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
602 : unsigned Amt = ARM_AM::getAM2Offset(OffImm);
603 : ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
604 3 : if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
605 3 : bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
606 3 : return !SimpleScaled;
607 : }
608 :
609 : // Minus reg for ldstso addr mode
610 3 : bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
611 : unsigned Op) const {
612 6 : unsigned OffImm = MI.getOperand(Op + 2).getImm();
613 3 : return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
614 : }
615 :
616 : // Load, scaled register offset
617 0 : bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
618 : unsigned Op) const {
619 0 : unsigned OffImm = MI.getOperand(Op + 2).getImm();
620 0 : return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
621 : }
622 :
623 37013 : static bool isEligibleForITBlock(const MachineInstr *MI) {
624 74026 : switch (MI->getOpcode()) {
625 : default: return true;
626 60 : case ARM::tADC: // ADC (register) T1
627 : case ARM::tADDi3: // ADD (immediate) T1
628 : case ARM::tADDi8: // ADD (immediate) T2
629 : case ARM::tADDrr: // ADD (register) T1
630 : case ARM::tAND: // AND (register) T1
631 : case ARM::tASRri: // ASR (immediate) T1
632 : case ARM::tASRrr: // ASR (register) T1
633 : case ARM::tBIC: // BIC (register) T1
634 : case ARM::tEOR: // EOR (register) T1
635 : case ARM::tLSLri: // LSL (immediate) T1
636 : case ARM::tLSLrr: // LSL (register) T1
637 : case ARM::tLSRri: // LSR (immediate) T1
638 : case ARM::tLSRrr: // LSR (register) T1
639 : case ARM::tMUL: // MUL T1
640 : case ARM::tMVN: // MVN (register) T1
641 : case ARM::tORR: // ORR (register) T1
642 : case ARM::tROR: // ROR (register) T1
643 : case ARM::tRSB: // RSB (immediate) T1
644 : case ARM::tSBC: // SBC (register) T1
645 : case ARM::tSUBi3: // SUB (immediate) T1
646 : case ARM::tSUBi8: // SUB (immediate) T2
647 : case ARM::tSUBrr: // SUB (register) T1
648 60 : return !ARMBaseInstrInfo::isCPSRDefined(*MI);
649 : }
650 : }
651 :
652 : /// isPredicable - Return true if the specified instruction can be predicated.
653 : /// By default, this returns true for every instruction with a
654 : /// PredicateOperand.
655 41972 : bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
656 41972 : if (!MI.isPredicable())
657 : return false;
658 :
659 37049 : if (MI.isBundle())
660 : return false;
661 :
662 37013 : if (!isEligibleForITBlock(&MI))
663 : return false;
664 :
665 : const ARMFunctionInfo *AFI =
666 37006 : MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
667 :
668 : // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
669 : // In their ARM encoding, they can't be encoded in a conditional form.
670 37006 : if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
671 : return false;
672 :
673 34698 : if (AFI->isThumb2Function()) {
674 14564 : if (getSubtarget().restrictIT())
675 827 : return isV8EligibleForIT(&MI);
676 : }
677 :
678 : return true;
679 : }
680 :
681 : namespace llvm {
682 :
683 94 : template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
684 613 : for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
685 519 : const MachineOperand &MO = MI->getOperand(i);
686 519 : if (!MO.isReg() || MO.isUndef() || MO.isUse())
687 : continue;
688 188 : if (MO.getReg() != ARM::CPSR)
689 : continue;
690 94 : if (!MO.isDead())
691 : return false;
692 : }
693 : // all definitions of CPSR are dead
694 : return true;
695 : }
696 :
697 : } // end namespace llvm
698 :
699 : /// GetInstSize - Return the size of the specified MachineInstr.
700 : ///
701 890858 : unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
702 890858 : const MachineBasicBlock &MBB = *MI.getParent();
703 890858 : const MachineFunction *MF = MBB.getParent();
704 890858 : const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
705 :
706 890858 : const MCInstrDesc &MCID = MI.getDesc();
707 1781716 : if (MCID.getSize())
708 : return MCID.getSize();
709 :
710 : // If this machine instr is an inline asm, measure it.
711 307218 : if (MI.getOpcode() == ARM::INLINEASM) {
712 172372 : unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
713 172372 : if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
714 530 : Size = alignTo(Size, 4);
715 172372 : return Size;
716 : }
717 : unsigned Opc = MI.getOpcode();
718 134846 : switch (Opc) {
719 : default:
720 : // pseudo-instruction sizes are zero.
721 : return 0;
722 0 : case TargetOpcode::BUNDLE:
723 0 : return getInstBundleLength(MI);
724 2038 : case ARM::MOVi16_ga_pcrel:
725 : case ARM::MOVTi16_ga_pcrel:
726 : case ARM::t2MOVi16_ga_pcrel:
727 : case ARM::t2MOVTi16_ga_pcrel:
728 2038 : return 4;
729 13 : case ARM::MOVi32imm:
730 : case ARM::t2MOVi32imm:
731 13 : return 8;
732 23100 : case ARM::CONSTPOOL_ENTRY:
733 : case ARM::JUMPTABLE_INSTS:
734 : case ARM::JUMPTABLE_ADDRS:
735 : case ARM::JUMPTABLE_TBB:
736 : case ARM::JUMPTABLE_TBH:
737 : // If this machine instr is a constant pool entry, its size is recorded as
738 : // operand #2.
739 23100 : return MI.getOperand(2).getImm();
740 4 : case ARM::Int_eh_sjlj_longjmp:
741 4 : return 16;
742 2 : case ARM::tInt_eh_sjlj_longjmp:
743 2 : return 10;
744 3 : case ARM::tInt_WIN_eh_sjlj_longjmp:
745 3 : return 12;
746 7 : case ARM::Int_eh_sjlj_setjmp:
747 : case ARM::Int_eh_sjlj_setjmp_nofp:
748 7 : return 20;
749 13 : case ARM::tInt_eh_sjlj_setjmp:
750 : case ARM::t2Int_eh_sjlj_setjmp:
751 : case ARM::t2Int_eh_sjlj_setjmp_nofp:
752 13 : return 12;
753 41 : case ARM::SPACE:
754 41 : return MI.getOperand(1).getImm();
755 : }
756 : }
757 :
758 0 : unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
759 : unsigned Size = 0;
760 0 : MachineBasicBlock::const_instr_iterator I = MI.getIterator();
761 0 : MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
762 0 : while (++I != E && I->isInsideBundle()) {
763 : assert(!I->isBundle() && "No nested bundle!");
764 0 : Size += getInstSizeInBytes(*I);
765 : }
766 0 : return Size;
767 : }
768 :
769 4 : void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
770 : MachineBasicBlock::iterator I,
771 : unsigned DestReg, bool KillSrc,
772 : const ARMSubtarget &Subtarget) const {
773 4 : unsigned Opc = Subtarget.isThumb()
774 4 : ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
775 : : ARM::MRS;
776 :
777 : MachineInstrBuilder MIB =
778 8 : BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
779 :
780 : // There is only 1 A/R class MRS instruction, and it always refers to
781 : // APSR. However, there are lots of other possibilities on M-class cores.
782 4 : if (Subtarget.isMClass())
783 : MIB.addImm(0x800);
784 :
785 4 : MIB.add(predOps(ARMCC::AL))
786 4 : .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
787 4 : }
788 :
789 4 : void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
790 : MachineBasicBlock::iterator I,
791 : unsigned SrcReg, bool KillSrc,
792 : const ARMSubtarget &Subtarget) const {
793 4 : unsigned Opc = Subtarget.isThumb()
794 4 : ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
795 : : ARM::MSR;
796 :
797 8 : MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
798 :
799 4 : if (Subtarget.isMClass())
800 : MIB.addImm(0x800);
801 : else
802 : MIB.addImm(8);
803 :
804 4 : MIB.addReg(SrcReg, getKillRegState(KillSrc))
805 4 : .add(predOps(ARMCC::AL))
806 4 : .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
807 4 : }
808 :
809 8350 : void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
810 : MachineBasicBlock::iterator I,
811 : const DebugLoc &DL, unsigned DestReg,
812 : unsigned SrcReg, bool KillSrc) const {
813 8350 : bool GPRDest = ARM::GPRRegClass.contains(DestReg);
814 : bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
815 :
816 8350 : if (GPRDest && GPRSrc) {
817 9616 : BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
818 4808 : .addReg(SrcReg, getKillRegState(KillSrc))
819 4808 : .add(predOps(ARMCC::AL))
820 4808 : .add(condCodeOp());
821 8333 : return;
822 : }
823 :
824 3542 : bool SPRDest = ARM::SPRRegClass.contains(DestReg);
825 : bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
826 :
827 : unsigned Opc = 0;
828 3542 : if (SPRDest && SPRSrc)
829 : Opc = ARM::VMOVS;
830 2192 : else if (GPRDest && SPRSrc)
831 : Opc = ARM::VMOVRS;
832 1875 : else if (SPRDest && GPRSrc)
833 : Opc = ARM::VMOVSR;
834 3068 : else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
835 : Opc = ARM::VMOVD;
836 844 : else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
837 : Opc = ARM::VORRq;
838 :
839 : if (Opc) {
840 7034 : MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
841 3517 : MIB.addReg(SrcReg, getKillRegState(KillSrc));
842 3517 : if (Opc == ARM::VORRq)
843 397 : MIB.addReg(SrcReg, getKillRegState(KillSrc));
844 3517 : MIB.add(predOps(ARMCC::AL));
845 : return;
846 : }
847 :
848 : // Handle register classes that require multiple instructions.
849 : unsigned BeginIdx = 0;
850 : unsigned SubRegs = 0;
851 : int Spacing = 1;
852 :
853 : // Use VORRq when possible.
854 50 : if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
855 : Opc = ARM::VORRq;
856 : BeginIdx = ARM::qsub_0;
857 : SubRegs = 2;
858 50 : } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
859 : Opc = ARM::VORRq;
860 : BeginIdx = ARM::qsub_0;
861 : SubRegs = 4;
862 : // Fall back to VMOVD.
863 40 : } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
864 : Opc = ARM::VMOVD;
865 : BeginIdx = ARM::dsub_0;
866 : SubRegs = 2;
867 32 : } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
868 : Opc = ARM::VMOVD;
869 : BeginIdx = ARM::dsub_0;
870 : SubRegs = 3;
871 32 : } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
872 : Opc = ARM::VMOVD;
873 : BeginIdx = ARM::dsub_0;
874 : SubRegs = 4;
875 32 : } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
876 2 : Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
877 : BeginIdx = ARM::gsub_0;
878 : SubRegs = 2;
879 28 : } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
880 : Opc = ARM::VMOVD;
881 : BeginIdx = ARM::dsub_0;
882 : SubRegs = 2;
883 : Spacing = 2;
884 28 : } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
885 : Opc = ARM::VMOVD;
886 : BeginIdx = ARM::dsub_0;
887 : SubRegs = 3;
888 : Spacing = 2;
889 28 : } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
890 : Opc = ARM::VMOVD;
891 : BeginIdx = ARM::dsub_0;
892 : SubRegs = 4;
893 : Spacing = 2;
894 14 : } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
895 : Opc = ARM::VMOVS;
896 : BeginIdx = ARM::ssub_0;
897 : SubRegs = 2;
898 8 : } else if (SrcReg == ARM::CPSR) {
899 4 : copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
900 4 : return;
901 4 : } else if (DestReg == ARM::CPSR) {
902 4 : copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
903 4 : return;
904 : }
905 :
906 : assert(Opc && "Impossible reg-to-reg copy");
907 :
908 17 : const TargetRegisterInfo *TRI = &getRegisterInfo();
909 17 : MachineInstrBuilder Mov;
910 :
911 : // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
912 17 : if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
913 0 : BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
914 0 : Spacing = -Spacing;
915 : }
916 : #ifndef NDEBUG
917 : SmallSet<unsigned, 4> DstRegs;
918 : #endif
919 61 : for (unsigned i = 0; i != SubRegs; ++i) {
920 44 : unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
921 44 : unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
922 : assert(Dst && Src && "Bad sub-register");
923 : #ifndef NDEBUG
924 : assert(!DstRegs.count(Src) && "destructive vector copy");
925 : DstRegs.insert(Dst);
926 : #endif
927 88 : Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
928 : // VORR takes two source operands.
929 44 : if (Opc == ARM::VORRq)
930 20 : Mov.addReg(Src);
931 44 : Mov = Mov.add(predOps(ARMCC::AL));
932 : // MOVr can set CC.
933 44 : if (Opc == ARM::MOVr)
934 2 : Mov = Mov.add(condCodeOp());
935 : }
936 : // Add implicit super-register defs and kills to the last instruction.
937 17 : Mov->addRegisterDefined(DestReg, TRI);
938 17 : if (KillSrc)
939 1 : Mov->addRegisterKilled(SrcReg, TRI);
940 : }
941 :
942 162 : bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
943 : const MachineOperand *&Src,
944 : const MachineOperand *&Dest) const {
945 : // VMOVRRD is also a copy instruction but it requires
946 : // special way of handling. It is more complex copy version
947 : // and since that we are not considering it. For recognition
948 : // of such instruction isExtractSubregLike MI interface fuction
949 : // could be used.
950 : // VORRq is considered as a move only if two inputs are
951 : // the same register.
952 162 : if (!MI.isMoveReg() ||
953 22 : (MI.getOpcode() == ARM::VORRq &&
954 0 : MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
955 : return false;
956 22 : Dest = &MI.getOperand(0);
957 22 : Src = &MI.getOperand(1);
958 22 : return true;
959 : }
960 :
961 : const MachineInstrBuilder &
962 70 : ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
963 : unsigned SubIdx, unsigned State,
964 : const TargetRegisterInfo *TRI) const {
965 70 : if (!SubIdx)
966 0 : return MIB.addReg(Reg, State);
967 :
968 70 : if (TargetRegisterInfo::isPhysicalRegister(Reg))
969 8 : return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
970 62 : return MIB.addReg(Reg, State, SubIdx);
971 : }
972 :
973 2357 : void ARMBaseInstrInfo::
974 : storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
975 : unsigned SrcReg, bool isKill, int FI,
976 : const TargetRegisterClass *RC,
977 : const TargetRegisterInfo *TRI) const {
978 2357 : MachineFunction &MF = *MBB.getParent();
979 2357 : MachineFrameInfo &MFI = MF.getFrameInfo();
980 : unsigned Align = MFI.getObjectAlignment(FI);
981 :
982 2357 : MachineMemOperand *MMO = MF.getMachineMemOperand(
983 : MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
984 : MFI.getObjectSize(FI), Align);
985 :
986 2357 : switch (TRI->getSpillSize(*RC)) {
987 2 : case 2:
988 4 : if (ARM::HPRRegClass.hasSubClassEq(RC)) {
989 4 : BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
990 2 : .addReg(SrcReg, getKillRegState(isKill))
991 : .addFrameIndex(FI)
992 : .addImm(0)
993 : .addMemOperand(MMO)
994 2 : .add(predOps(ARMCC::AL));
995 : } else
996 0 : llvm_unreachable("Unknown reg class!");
997 2 : break;
998 1698 : case 4:
999 3396 : if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1000 4626 : BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
1001 1542 : .addReg(SrcReg, getKillRegState(isKill))
1002 : .addFrameIndex(FI)
1003 : .addImm(0)
1004 : .addMemOperand(MMO)
1005 1542 : .add(predOps(ARMCC::AL));
1006 312 : } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1007 468 : BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
1008 156 : .addReg(SrcReg, getKillRegState(isKill))
1009 : .addFrameIndex(FI)
1010 : .addImm(0)
1011 : .addMemOperand(MMO)
1012 156 : .add(predOps(ARMCC::AL));
1013 : } else
1014 0 : llvm_unreachable("Unknown reg class!");
1015 : break;
1016 258 : case 8:
1017 516 : if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1018 753 : BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1019 251 : .addReg(SrcReg, getKillRegState(isKill))
1020 : .addFrameIndex(FI)
1021 : .addImm(0)
1022 : .addMemOperand(MMO)
1023 251 : .add(predOps(ARMCC::AL));
1024 14 : } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1025 7 : if (Subtarget.hasV5TEOps()) {
1026 15 : MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1027 5 : AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1028 5 : AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1029 5 : MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1030 5 : .add(predOps(ARMCC::AL));
1031 : } else {
1032 : // Fallback to STM instruction, which has existed since the dawn of
1033 : // time.
1034 6 : MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1035 : .addFrameIndex(FI)
1036 : .addMemOperand(MMO)
1037 2 : .add(predOps(ARMCC::AL));
1038 2 : AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1039 2 : AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1040 : }
1041 : } else
1042 0 : llvm_unreachable("Unknown reg class!");
1043 : break;
1044 396 : case 16:
1045 792 : if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1046 : // Use aligned spills if the stack can be realigned.
1047 396 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1048 1167 : BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1049 : .addFrameIndex(FI)
1050 : .addImm(16)
1051 389 : .addReg(SrcReg, getKillRegState(isKill))
1052 : .addMemOperand(MMO)
1053 389 : .add(predOps(ARMCC::AL));
1054 : } else {
1055 21 : BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1056 7 : .addReg(SrcReg, getKillRegState(isKill))
1057 : .addFrameIndex(FI)
1058 : .addMemOperand(MMO)
1059 7 : .add(predOps(ARMCC::AL));
1060 : }
1061 : } else
1062 0 : llvm_unreachable("Unknown reg class!");
1063 : break;
1064 1 : case 24:
1065 2 : if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1066 : // Use aligned spills if the stack can be realigned.
1067 1 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1068 0 : BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1069 : .addFrameIndex(FI)
1070 : .addImm(16)
1071 0 : .addReg(SrcReg, getKillRegState(isKill))
1072 : .addMemOperand(MMO)
1073 0 : .add(predOps(ARMCC::AL));
1074 : } else {
1075 1 : MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1076 2 : get(ARM::VSTMDIA))
1077 : .addFrameIndex(FI)
1078 1 : .add(predOps(ARMCC::AL))
1079 1 : .addMemOperand(MMO);
1080 1 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1081 1 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1082 1 : AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1083 : }
1084 : } else
1085 0 : llvm_unreachable("Unknown reg class!");
1086 : break;
1087 0 : case 32:
1088 0 : if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1089 0 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1090 : // FIXME: It's possible to only store part of the QQ register if the
1091 : // spilled def has a sub-register index.
1092 0 : BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1093 : .addFrameIndex(FI)
1094 : .addImm(16)
1095 0 : .addReg(SrcReg, getKillRegState(isKill))
1096 : .addMemOperand(MMO)
1097 0 : .add(predOps(ARMCC::AL));
1098 : } else {
1099 0 : MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
1100 0 : get(ARM::VSTMDIA))
1101 : .addFrameIndex(FI)
1102 0 : .add(predOps(ARMCC::AL))
1103 0 : .addMemOperand(MMO);
1104 0 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1105 0 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1106 0 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1107 0 : AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1108 : }
1109 : } else
1110 0 : llvm_unreachable("Unknown reg class!");
1111 : break;
1112 2 : case 64:
1113 4 : if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1114 6 : MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1115 : .addFrameIndex(FI)
1116 2 : .add(predOps(ARMCC::AL))
1117 2 : .addMemOperand(MMO);
1118 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1119 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1120 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1121 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1122 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1123 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1124 2 : MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1125 2 : AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1126 : } else
1127 0 : llvm_unreachable("Unknown reg class!");
1128 2 : break;
1129 0 : default:
1130 0 : llvm_unreachable("Unknown reg class!");
1131 : }
1132 2357 : }
1133 :
1134 13351 : unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1135 : int &FrameIndex) const {
1136 26702 : switch (MI.getOpcode()) {
1137 : default: break;
1138 23 : case ARM::STRrs:
1139 : case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1140 23 : if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1141 23 : MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1142 0 : MI.getOperand(3).getImm() == 0) {
1143 0 : FrameIndex = MI.getOperand(1).getIndex();
1144 0 : return MI.getOperand(0).getReg();
1145 : }
1146 : break;
1147 1117 : case ARM::STRi12:
1148 : case ARM::t2STRi12:
1149 : case ARM::tSTRspi:
1150 : case ARM::VSTRD:
1151 : case ARM::VSTRS:
1152 2234 : if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1153 370 : MI.getOperand(2).getImm() == 0) {
1154 332 : FrameIndex = MI.getOperand(1).getIndex();
1155 332 : return MI.getOperand(0).getReg();
1156 : }
1157 : break;
1158 12 : case ARM::VST1q64:
1159 : case ARM::VST1d64TPseudo:
1160 : case ARM::VST1d64QPseudo:
1161 24 : if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1162 0 : FrameIndex = MI.getOperand(0).getIndex();
1163 0 : return MI.getOperand(2).getReg();
1164 : }
1165 : break;
1166 0 : case ARM::VSTMQIA:
1167 0 : if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1168 0 : FrameIndex = MI.getOperand(1).getIndex();
1169 0 : return MI.getOperand(0).getReg();
1170 : }
1171 : break;
1172 : }
1173 :
1174 : return 0;
1175 : }
1176 :
1177 135897 : unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
1178 : int &FrameIndex) const {
1179 : SmallVector<const MachineMemOperand *, 1> Accesses;
1180 135897 : if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
1181 2780 : FrameIndex =
1182 2780 : cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1183 2780 : ->getFrameIndex();
1184 2780 : return true;
1185 : }
1186 : return false;
1187 : }
1188 :
1189 2029 : void ARMBaseInstrInfo::
1190 : loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
1191 : unsigned DestReg, int FI,
1192 : const TargetRegisterClass *RC,
1193 : const TargetRegisterInfo *TRI) const {
1194 2029 : DebugLoc DL;
1195 2029 : if (I != MBB.end()) DL = I->getDebugLoc();
1196 2029 : MachineFunction &MF = *MBB.getParent();
1197 2029 : MachineFrameInfo &MFI = MF.getFrameInfo();
1198 : unsigned Align = MFI.getObjectAlignment(FI);
1199 2029 : MachineMemOperand *MMO = MF.getMachineMemOperand(
1200 : MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
1201 : MFI.getObjectSize(FI), Align);
1202 :
1203 2029 : switch (TRI->getSpillSize(*RC)) {
1204 2 : case 2:
1205 4 : if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1206 4 : BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1207 : .addFrameIndex(FI)
1208 : .addImm(0)
1209 : .addMemOperand(MMO)
1210 2 : .add(predOps(ARMCC::AL));
1211 : } else
1212 0 : llvm_unreachable("Unknown reg class!");
1213 2 : break;
1214 1202 : case 4:
1215 2404 : if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1216 2372 : BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1217 : .addFrameIndex(FI)
1218 : .addImm(0)
1219 : .addMemOperand(MMO)
1220 1186 : .add(predOps(ARMCC::AL));
1221 32 : } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1222 32 : BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1223 : .addFrameIndex(FI)
1224 : .addImm(0)
1225 : .addMemOperand(MMO)
1226 16 : .add(predOps(ARMCC::AL));
1227 : } else
1228 0 : llvm_unreachable("Unknown reg class!");
1229 : break;
1230 435 : case 8:
1231 870 : if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1232 862 : BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1233 : .addFrameIndex(FI)
1234 : .addImm(0)
1235 : .addMemOperand(MMO)
1236 431 : .add(predOps(ARMCC::AL));
1237 8 : } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1238 4 : MachineInstrBuilder MIB;
1239 :
1240 4 : if (Subtarget.hasV5TEOps()) {
1241 4 : MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1242 2 : AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1243 2 : AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1244 2 : MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1245 2 : .add(predOps(ARMCC::AL));
1246 : } else {
1247 : // Fallback to LDM instruction, which has existed since the dawn of
1248 : // time.
1249 4 : MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1250 : .addFrameIndex(FI)
1251 : .addMemOperand(MMO)
1252 2 : .add(predOps(ARMCC::AL));
1253 2 : MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1254 2 : MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1255 : }
1256 :
1257 4 : if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1258 0 : MIB.addReg(DestReg, RegState::ImplicitDefine);
1259 : } else
1260 0 : llvm_unreachable("Unknown reg class!");
1261 : break;
1262 387 : case 16:
1263 774 : if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1264 387 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1265 770 : BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1266 : .addFrameIndex(FI)
1267 : .addImm(16)
1268 : .addMemOperand(MMO)
1269 385 : .add(predOps(ARMCC::AL));
1270 : } else {
1271 4 : BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1272 : .addFrameIndex(FI)
1273 : .addMemOperand(MMO)
1274 2 : .add(predOps(ARMCC::AL));
1275 : }
1276 : } else
1277 0 : llvm_unreachable("Unknown reg class!");
1278 : break;
1279 1 : case 24:
1280 2 : if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1281 1 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1282 0 : BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1283 : .addFrameIndex(FI)
1284 : .addImm(16)
1285 : .addMemOperand(MMO)
1286 0 : .add(predOps(ARMCC::AL));
1287 : } else {
1288 2 : MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1289 : .addFrameIndex(FI)
1290 : .addMemOperand(MMO)
1291 1 : .add(predOps(ARMCC::AL));
1292 1 : MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1293 1 : MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1294 1 : MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1295 1 : if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1296 0 : MIB.addReg(DestReg, RegState::ImplicitDefine);
1297 : }
1298 : } else
1299 0 : llvm_unreachable("Unknown reg class!");
1300 : break;
1301 0 : case 32:
1302 0 : if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1303 0 : if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1304 0 : BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1305 : .addFrameIndex(FI)
1306 : .addImm(16)
1307 : .addMemOperand(MMO)
1308 0 : .add(predOps(ARMCC::AL));
1309 : } else {
1310 0 : MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1311 : .addFrameIndex(FI)
1312 0 : .add(predOps(ARMCC::AL))
1313 0 : .addMemOperand(MMO);
1314 0 : MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1315 0 : MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1316 0 : MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1317 0 : MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1318 0 : if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1319 0 : MIB.addReg(DestReg, RegState::ImplicitDefine);
1320 : }
1321 : } else
1322 0 : llvm_unreachable("Unknown reg class!");
1323 : break;
1324 2 : case 64:
1325 4 : if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1326 4 : MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1327 : .addFrameIndex(FI)
1328 2 : .add(predOps(ARMCC::AL))
1329 2 : .addMemOperand(MMO);
1330 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1331 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1332 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1333 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1334 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1335 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1336 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1337 2 : MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1338 2 : if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1339 0 : MIB.addReg(DestReg, RegState::ImplicitDefine);
1340 : } else
1341 0 : llvm_unreachable("Unknown reg class!");
1342 2 : break;
1343 0 : default:
1344 0 : llvm_unreachable("Unknown regclass!");
1345 : }
1346 2029 : }
1347 :
1348 35132 : unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1349 : int &FrameIndex) const {
1350 70264 : switch (MI.getOpcode()) {
1351 : default: break;
1352 159 : case ARM::LDRrs:
1353 : case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1354 159 : if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1355 159 : MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1356 0 : MI.getOperand(3).getImm() == 0) {
1357 0 : FrameIndex = MI.getOperand(1).getIndex();
1358 0 : return MI.getOperand(0).getReg();
1359 : }
1360 : break;
1361 8260 : case ARM::LDRi12:
1362 : case ARM::t2LDRi12:
1363 : case ARM::tLDRspi:
1364 : case ARM::VLDRD:
1365 : case ARM::VLDRS:
1366 16520 : if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1367 4745 : MI.getOperand(2).getImm() == 0) {
1368 2963 : FrameIndex = MI.getOperand(1).getIndex();
1369 2963 : return MI.getOperand(0).getReg();
1370 : }
1371 : break;
1372 8 : case ARM::VLD1q64:
1373 : case ARM::VLD1d8TPseudo:
1374 : case ARM::VLD1d16TPseudo:
1375 : case ARM::VLD1d32TPseudo:
1376 : case ARM::VLD1d64TPseudo:
1377 : case ARM::VLD1d8QPseudo:
1378 : case ARM::VLD1d16QPseudo:
1379 : case ARM::VLD1d32QPseudo:
1380 : case ARM::VLD1d64QPseudo:
1381 16 : if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1382 2 : FrameIndex = MI.getOperand(1).getIndex();
1383 2 : return MI.getOperand(0).getReg();
1384 : }
1385 : break;
1386 0 : case ARM::VLDMQIA:
1387 0 : if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1388 0 : FrameIndex = MI.getOperand(1).getIndex();
1389 0 : return MI.getOperand(0).getReg();
1390 : }
1391 : break;
1392 : }
1393 :
1394 : return 0;
1395 : }
1396 :
1397 139283 : unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
1398 : int &FrameIndex) const {
1399 : SmallVector<const MachineMemOperand *, 1> Accesses;
1400 139283 : if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
1401 3398 : FrameIndex =
1402 3398 : cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1403 3398 : ->getFrameIndex();
1404 3398 : return true;
1405 : }
1406 : return false;
1407 : }
1408 :
1409 : /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1410 : /// depending on whether the result is used.
1411 56 : void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1412 56 : bool isThumb1 = Subtarget.isThumb1Only();
1413 : bool isThumb2 = Subtarget.isThumb2();
1414 56 : const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1415 :
1416 : DebugLoc dl = MI->getDebugLoc();
1417 56 : MachineBasicBlock *BB = MI->getParent();
1418 :
1419 56 : MachineInstrBuilder LDM, STM;
1420 56 : if (isThumb1 || !MI->getOperand(1).isDead()) {
1421 41 : MachineOperand LDWb(MI->getOperand(1));
1422 75 : LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1423 : : isThumb1 ? ARM::tLDMIA_UPD
1424 109 : : ARM::LDMIA_UPD))
1425 : .add(LDWb);
1426 : } else {
1427 31 : LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1428 : }
1429 :
1430 56 : if (isThumb1 || !MI->getOperand(0).isDead()) {
1431 41 : MachineOperand STWb(MI->getOperand(0));
1432 75 : STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1433 : : isThumb1 ? ARM::tSTMIA_UPD
1434 109 : : ARM::STMIA_UPD))
1435 : .add(STWb);
1436 : } else {
1437 31 : STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1438 : }
1439 :
1440 56 : MachineOperand LDBase(MI->getOperand(3));
1441 56 : LDM.add(LDBase).add(predOps(ARMCC::AL));
1442 :
1443 56 : MachineOperand STBase(MI->getOperand(2));
1444 56 : STM.add(STBase).add(predOps(ARMCC::AL));
1445 :
1446 : // Sort the scratch registers into ascending order.
1447 56 : const TargetRegisterInfo &TRI = getRegisterInfo();
1448 : SmallVector<unsigned, 6> ScratchRegs;
1449 285 : for(unsigned I = 5; I < MI->getNumOperands(); ++I)
1450 458 : ScratchRegs.push_back(MI->getOperand(I).getReg());
1451 : llvm::sort(ScratchRegs,
1452 : [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1453 173 : return TRI.getEncodingValue(Reg1) <
1454 0 : TRI.getEncodingValue(Reg2);
1455 : });
1456 :
1457 285 : for (const auto &Reg : ScratchRegs) {
1458 229 : LDM.addReg(Reg, RegState::Define);
1459 229 : STM.addReg(Reg, RegState::Kill);
1460 : }
1461 :
1462 56 : BB->erase(MI);
1463 56 : }
1464 :
1465 51906 : bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1466 103812 : if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1467 : assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
1468 : "LOAD_STACK_GUARD currently supported only for MachO.");
1469 120 : expandLoadStackGuard(MI);
1470 120 : MI.getParent()->erase(MI);
1471 60 : return true;
1472 : }
1473 :
1474 51846 : if (MI.getOpcode() == ARM::MEMCPY) {
1475 56 : expandMEMCPY(MI);
1476 56 : return true;
1477 : }
1478 :
1479 : // This hook gets to expand COPY instructions before they become
1480 : // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1481 : // widened to VMOVD. We prefer the VMOVD when possible because it may be
1482 : // changed into a VORR that can go down the NEON pipeline.
1483 51790 : if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
1484 : return false;
1485 :
1486 : // Look for a copy between even S-registers. That is where we keep floats
1487 : // when using NEON v2f32 instructions for f32 arithmetic.
1488 11766 : unsigned DstRegS = MI.getOperand(0).getReg();
1489 11766 : unsigned SrcRegS = MI.getOperand(1).getReg();
1490 23532 : if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1491 : return false;
1492 :
1493 1349 : const TargetRegisterInfo *TRI = &getRegisterInfo();
1494 1349 : unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1495 : &ARM::DPRRegClass);
1496 1349 : unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1497 : &ARM::DPRRegClass);
1498 1349 : if (!DstRegD || !SrcRegD)
1499 : return false;
1500 :
1501 : // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1502 : // legal if the COPY already defines the full DstRegD, and it isn't a
1503 : // sub-register insertion.
1504 858 : if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1505 835 : return false;
1506 :
1507 : // A dead copy shouldn't show up here, but reject it just in case.
1508 16 : if (MI.getOperand(0).isDead())
1509 : return false;
1510 :
1511 : // All clear, widen the COPY.
1512 : LLVM_DEBUG(dbgs() << "widening: " << MI);
1513 8 : MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1514 :
1515 : // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1516 : // or some other super-register.
1517 8 : int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
1518 8 : if (ImpDefIdx != -1)
1519 1 : MI.RemoveOperand(ImpDefIdx);
1520 :
1521 : // Change the opcode and operands.
1522 8 : MI.setDesc(get(ARM::VMOVD));
1523 8 : MI.getOperand(0).setReg(DstRegD);
1524 16 : MI.getOperand(1).setReg(SrcRegD);
1525 8 : MIB.add(predOps(ARMCC::AL));
1526 :
1527 : // We are now reading SrcRegD instead of SrcRegS. This may upset the
1528 : // register scavenger and machine verifier, so we need to indicate that we
1529 : // are reading an undefined value from SrcRegD, but a proper value from
1530 : // SrcRegS.
1531 8 : MI.getOperand(1).setIsUndef();
1532 8 : MIB.addReg(SrcRegS, RegState::Implicit);
1533 :
1534 : // SrcRegD may actually contain an unrelated value in the ssub_1
1535 : // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1536 16 : if (MI.getOperand(1).isKill()) {
1537 : MI.getOperand(1).setIsKill(false);
1538 7 : MI.addRegisterKilled(SrcRegS, TRI, true);
1539 : }
1540 :
1541 : LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1542 : return true;
1543 : }
1544 :
1545 : /// Create a copy of a const pool value. Update CPI to the new index and return
1546 : /// the label UID.
1547 0 : static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1548 0 : MachineConstantPool *MCP = MF.getConstantPool();
1549 0 : ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1550 :
1551 0 : const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1552 : assert(MCPE.isMachineConstantPoolEntry() &&
1553 : "Expecting a machine constantpool entry!");
1554 0 : ARMConstantPoolValue *ACPV =
1555 : static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1556 :
1557 : unsigned PCLabelId = AFI->createPICLabelUId();
1558 : ARMConstantPoolValue *NewCPV = nullptr;
1559 :
1560 : // FIXME: The below assumes PIC relocation model and that the function
1561 : // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1562 : // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1563 : // instructions, so that's probably OK, but is PIC always correct when
1564 : // we get here?
1565 0 : if (ACPV->isGlobalValue())
1566 0 : NewCPV = ARMConstantPoolConstant::Create(
1567 0 : cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1568 0 : 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1569 0 : else if (ACPV->isExtSymbol())
1570 : NewCPV = ARMConstantPoolSymbol::
1571 0 : Create(MF.getFunction().getContext(),
1572 : cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1573 0 : else if (ACPV->isBlockAddress())
1574 : NewCPV = ARMConstantPoolConstant::
1575 0 : Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1576 : ARMCP::CPBlockAddress, 4);
1577 0 : else if (ACPV->isLSDA())
1578 0 : NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1579 : ARMCP::CPLSDA, 4);
1580 0 : else if (ACPV->isMachineBasicBlock())
1581 : NewCPV = ARMConstantPoolMBB::
1582 0 : Create(MF.getFunction().getContext(),
1583 : cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1584 : else
1585 0 : llvm_unreachable("Unexpected ARM constantpool value type!!");
1586 0 : CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1587 0 : return PCLabelId;
1588 : }
1589 :
1590 2967 : void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
1591 : MachineBasicBlock::iterator I,
1592 : unsigned DestReg, unsigned SubIdx,
1593 : const MachineInstr &Orig,
1594 : const TargetRegisterInfo &TRI) const {
1595 2967 : unsigned Opcode = Orig.getOpcode();
1596 2967 : switch (Opcode) {
1597 2967 : default: {
1598 2967 : MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
1599 2967 : MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1600 : MBB.insert(I, MI);
1601 2967 : break;
1602 : }
1603 0 : case ARM::tLDRpci_pic:
1604 : case ARM::t2LDRpci_pic: {
1605 0 : MachineFunction &MF = *MBB.getParent();
1606 0 : unsigned CPI = Orig.getOperand(1).getIndex();
1607 0 : unsigned PCLabelId = duplicateCPV(MF, CPI);
1608 0 : BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1609 : .addConstantPoolIndex(CPI)
1610 0 : .addImm(PCLabelId)
1611 : .cloneMemRefs(Orig);
1612 : break;
1613 : }
1614 : }
1615 2967 : }
1616 :
1617 : MachineInstr &
1618 392 : ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
1619 : MachineBasicBlock::iterator InsertBefore,
1620 : const MachineInstr &Orig) const {
1621 392 : MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1622 392 : MachineBasicBlock::instr_iterator I = Cloned.getIterator();
1623 : for (;;) {
1624 792 : switch (I->getOpcode()) {
1625 0 : case ARM::tLDRpci_pic:
1626 : case ARM::t2LDRpci_pic: {
1627 0 : MachineFunction &MF = *MBB.getParent();
1628 0 : unsigned CPI = I->getOperand(1).getIndex();
1629 0 : unsigned PCLabelId = duplicateCPV(MF, CPI);
1630 0 : I->getOperand(1).setIndex(CPI);
1631 0 : I->getOperand(2).setImm(PCLabelId);
1632 : break;
1633 : }
1634 : }
1635 396 : if (!I->isBundledWithSucc())
1636 : break;
1637 : ++I;
1638 : }
1639 392 : return Cloned;
1640 : }
1641 :
1642 447 : bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
1643 : const MachineInstr &MI1,
1644 : const MachineRegisterInfo *MRI) const {
1645 447 : unsigned Opcode = MI0.getOpcode();
1646 894 : if (Opcode == ARM::t2LDRpci ||
1647 447 : Opcode == ARM::t2LDRpci_pic ||
1648 892 : Opcode == ARM::tLDRpci ||
1649 446 : Opcode == ARM::tLDRpci_pic ||
1650 440 : Opcode == ARM::LDRLIT_ga_pcrel ||
1651 : Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1652 870 : Opcode == ARM::tLDRLIT_ga_pcrel ||
1653 435 : Opcode == ARM::MOV_ga_pcrel ||
1654 860 : Opcode == ARM::MOV_ga_pcrel_ldr ||
1655 430 : Opcode == ARM::t2MOV_ga_pcrel) {
1656 128 : if (MI1.getOpcode() != Opcode)
1657 : return false;
1658 64 : if (MI0.getNumOperands() != MI1.getNumOperands())
1659 : return false;
1660 :
1661 64 : const MachineOperand &MO0 = MI0.getOperand(1);
1662 64 : const MachineOperand &MO1 = MI1.getOperand(1);
1663 192 : if (MO0.getOffset() != MO1.getOffset())
1664 : return false;
1665 :
1666 128 : if (Opcode == ARM::LDRLIT_ga_pcrel ||
1667 64 : Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1668 64 : Opcode == ARM::tLDRLIT_ga_pcrel ||
1669 54 : Opcode == ARM::MOV_ga_pcrel ||
1670 53 : Opcode == ARM::MOV_ga_pcrel_ldr ||
1671 : Opcode == ARM::t2MOV_ga_pcrel)
1672 : // Ignore the PC labels.
1673 57 : return MO0.getGlobal() == MO1.getGlobal();
1674 :
1675 7 : const MachineFunction *MF = MI0.getParent()->getParent();
1676 7 : const MachineConstantPool *MCP = MF->getConstantPool();
1677 7 : int CPI0 = MO0.getIndex();
1678 7 : int CPI1 = MO1.getIndex();
1679 7 : const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1680 7 : const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1681 7 : bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1682 7 : bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1683 7 : if (isARMCP0 && isARMCP1) {
1684 1 : ARMConstantPoolValue *ACPV0 =
1685 : static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1686 1 : ARMConstantPoolValue *ACPV1 =
1687 : static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1688 1 : return ACPV0->hasSameValue(ACPV1);
1689 6 : } else if (!isARMCP0 && !isARMCP1) {
1690 6 : return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1691 : }
1692 : return false;
1693 383 : } else if (Opcode == ARM::PICLDR) {
1694 0 : if (MI1.getOpcode() != Opcode)
1695 : return false;
1696 0 : if (MI0.getNumOperands() != MI1.getNumOperands())
1697 : return false;
1698 :
1699 0 : unsigned Addr0 = MI0.getOperand(1).getReg();
1700 0 : unsigned Addr1 = MI1.getOperand(1).getReg();
1701 0 : if (Addr0 != Addr1) {
1702 0 : if (!MRI ||
1703 0 : !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1704 : !TargetRegisterInfo::isVirtualRegister(Addr1))
1705 : return false;
1706 :
1707 : // This assumes SSA form.
1708 0 : MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1709 0 : MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1710 : // Check if the loaded value, e.g. a constantpool of a global address, are
1711 : // the same.
1712 0 : if (!produceSameValue(*Def0, *Def1, MRI))
1713 : return false;
1714 : }
1715 :
1716 0 : for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1717 : // %12 = PICLDR %11, 0, 14, %noreg
1718 0 : const MachineOperand &MO0 = MI0.getOperand(i);
1719 0 : const MachineOperand &MO1 = MI1.getOperand(i);
1720 0 : if (!MO0.isIdenticalTo(MO1))
1721 : return false;
1722 : }
1723 : return true;
1724 : }
1725 :
1726 383 : return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1727 : }
1728 :
1729 : /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1730 : /// determine if two loads are loading from the same base address. It should
1731 : /// only return true if the base pointers are the same and the only differences
1732 : /// between the two addresses is the offset. It also returns the offsets by
1733 : /// reference.
1734 : ///
1735 : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1736 : /// is permanently disabled.
1737 150571 : bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1738 : int64_t &Offset1,
1739 : int64_t &Offset2) const {
1740 : // Don't worry about Thumb: just ARM and Thumb2.
1741 150571 : if (Subtarget.isThumb1Only()) return false;
1742 :
1743 131191 : if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1744 : return false;
1745 :
1746 94071 : switch (Load1->getMachineOpcode()) {
1747 : default:
1748 : return false;
1749 : case ARM::LDRi12:
1750 : case ARM::LDRBi12:
1751 : case ARM::LDRD:
1752 : case ARM::LDRH:
1753 : case ARM::LDRSB:
1754 : case ARM::LDRSH:
1755 : case ARM::VLDRD:
1756 : case ARM::VLDRS:
1757 : case ARM::t2LDRi8:
1758 : case ARM::t2LDRBi8:
1759 : case ARM::t2LDRDi8:
1760 : case ARM::t2LDRSHi8:
1761 : case ARM::t2LDRi12:
1762 : case ARM::t2LDRBi12:
1763 : case ARM::t2LDRSHi12:
1764 : break;
1765 : }
1766 :
1767 73567 : switch (Load2->getMachineOpcode()) {
1768 : default:
1769 : return false;
1770 : case ARM::LDRi12:
1771 : case ARM::LDRBi12:
1772 : case ARM::LDRD:
1773 : case ARM::LDRH:
1774 : case ARM::LDRSB:
1775 : case ARM::LDRSH:
1776 : case ARM::VLDRD:
1777 : case ARM::VLDRS:
1778 : case ARM::t2LDRi8:
1779 : case ARM::t2LDRBi8:
1780 : case ARM::t2LDRSHi8:
1781 : case ARM::t2LDRi12:
1782 : case ARM::t2LDRBi12:
1783 : case ARM::t2LDRSHi12:
1784 : break;
1785 : }
1786 :
1787 : // Check if base addresses and chain operands match.
1788 65541 : if (Load1->getOperand(0) != Load2->getOperand(0) ||
1789 : Load1->getOperand(4) != Load2->getOperand(4))
1790 : return false;
1791 :
1792 : // Index should be Reg0.
1793 : if (Load1->getOperand(3) != Load2->getOperand(3))
1794 : return false;
1795 :
1796 : // Determine the offsets.
1797 : if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1798 : isa<ConstantSDNode>(Load2->getOperand(1))) {
1799 14825 : Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1800 29650 : Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1801 14825 : return true;
1802 : }
1803 :
1804 : return false;
1805 : }
1806 :
1807 : /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1808 : /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1809 : /// be scheduled togther. On some targets if two loads are loading from
1810 : /// addresses in the same cache line, it's better if they are scheduled
1811 : /// together. This function takes two integers that represent the load offsets
1812 : /// from the common base address. It returns true if it decides it's desirable
1813 : /// to schedule the two loads together. "NumLoads" is the number of loads that
1814 : /// have already been scheduled after Load1.
1815 : ///
1816 : /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1817 : /// is permanently disabled.
1818 3197 : bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1819 : int64_t Offset1, int64_t Offset2,
1820 : unsigned NumLoads) const {
1821 : // Don't worry about Thumb: just ARM and Thumb2.
1822 3197 : if (Subtarget.isThumb1Only()) return false;
1823 :
1824 : assert(Offset2 > Offset1);
1825 :
1826 3197 : if ((Offset2 - Offset1) / 8 > 64)
1827 : return false;
1828 :
1829 : // Check if the machine opcodes are different. If they are different
1830 : // then we consider them to not be of the same base address,
1831 : // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1832 : // In this case, they are considered to be the same because they are different
1833 : // encoding forms of the same basic instruction.
1834 3197 : if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1835 24 : !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1836 : Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1837 2 : (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1838 : Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1839 : return false; // FIXME: overly conservative?
1840 :
1841 : // Four loads in a row should be sufficient.
1842 3176 : if (NumLoads >= 3)
1843 486 : return false;
1844 :
1845 : return true;
1846 : }
1847 :
1848 101339 : bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1849 : const MachineBasicBlock *MBB,
1850 : const MachineFunction &MF) const {
1851 : // Debug info is never a scheduling boundary. It's necessary to be explicit
1852 : // due to the special treatment of IT instructions below, otherwise a
1853 : // dbg_value followed by an IT will result in the IT instruction being
1854 : // considered a scheduling hazard, which is wrong. It should be the actual
1855 : // instruction preceding the dbg_value instruction(s), just like it is
1856 : // when debug info is not present.
1857 : if (MI.isDebugInstr())
1858 : return false;
1859 :
1860 : // Terminators and labels can't be scheduled around.
1861 101212 : if (MI.isTerminator() || MI.isPosition())
1862 : return true;
1863 :
1864 : // Treat the start of the IT block as a scheduling boundary, but schedule
1865 : // t2IT along with all instructions following it.
1866 : // FIXME: This is a big hammer. But the alternative is to add all potential
1867 : // true and anti dependencies to IT block instructions as implicit operands
1868 : // to the t2IT instruction. The added compile time and complexity does not
1869 : // seem worth it.
1870 : MachineBasicBlock::const_iterator I = MI;
1871 : // Make sure to skip any debug instructions
1872 74065 : while (++I != MBB->end() && I->isDebugInstr())
1873 : ;
1874 74016 : if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1875 : return true;
1876 :
1877 : // Don't attempt to schedule around any instruction that defines
1878 : // a stack-oriented pointer, as it's unlikely to be profitable. This
1879 : // saves compile time, because it doesn't require every single
1880 : // stack slot reference to depend on the instruction that does the
1881 : // modification.
1882 : // Calls don't actually change the stack pointer, even if they have imp-defs.
1883 : // No ARM calling conventions change the stack pointer. (X86 calling
1884 : // conventions sometimes do).
1885 148032 : if (!MI.isCall() && MI.definesRegister(ARM::SP))
1886 7219 : return true;
1887 :
1888 : return false;
1889 : }
1890 :
1891 1487 : bool ARMBaseInstrInfo::
1892 : isProfitableToIfCvt(MachineBasicBlock &MBB,
1893 : unsigned NumCycles, unsigned ExtraPredCycles,
1894 : BranchProbability Probability) const {
1895 1487 : if (!NumCycles)
1896 : return false;
1897 :
1898 : // If we are optimizing for size, see if the branch in the predecessor can be
1899 : // lowered to cbn?z by the constant island lowering pass, and return false if
1900 : // so. This results in a shorter instruction sequence.
1901 1487 : if (MBB.getParent()->getFunction().optForSize()) {
1902 127 : MachineBasicBlock *Pred = *MBB.pred_begin();
1903 127 : if (!Pred->empty()) {
1904 : MachineInstr *LastMI = &*Pred->rbegin();
1905 254 : if (LastMI->getOpcode() == ARM::t2Bcc) {
1906 : MachineBasicBlock::iterator CmpMI = LastMI;
1907 118 : if (CmpMI != Pred->begin()) {
1908 : --CmpMI;
1909 236 : if (CmpMI->getOpcode() == ARM::tCMPi8 ||
1910 : CmpMI->getOpcode() == ARM::t2CMPri) {
1911 85 : unsigned Reg = CmpMI->getOperand(0).getReg();
1912 85 : unsigned PredReg = 0;
1913 85 : ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
1914 85 : if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
1915 : isARMLowRegister(Reg))
1916 43 : return false;
1917 : }
1918 : }
1919 : }
1920 : }
1921 : }
1922 1444 : return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1923 1444 : MBB, 0, 0, Probability);
1924 : }
1925 :
1926 1550 : bool ARMBaseInstrInfo::
1927 : isProfitableToIfCvt(MachineBasicBlock &TBB,
1928 : unsigned TCycles, unsigned TExtra,
1929 : MachineBasicBlock &FBB,
1930 : unsigned FCycles, unsigned FExtra,
1931 : BranchProbability Probability) const {
1932 1550 : if (!TCycles)
1933 : return false;
1934 :
1935 : // Attempt to estimate the relative costs of predication versus branching.
1936 : // Here we scale up each component of UnpredCost to avoid precision issue when
1937 : // scaling TCycles/FCycles by Probability.
1938 : const unsigned ScalingUpFactor = 1024;
1939 :
1940 1550 : unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1941 : unsigned UnpredCost;
1942 1550 : if (!Subtarget.hasBranchPredictor()) {
1943 : // When we don't have a branch predictor it's always cheaper to not take a
1944 : // branch than take it, so we have to take that into account.
1945 : unsigned NotTakenBranchCost = 1;
1946 24 : unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1947 : unsigned TUnpredCycles, FUnpredCycles;
1948 24 : if (!FCycles) {
1949 : // Triangle: TBB is the fallthrough
1950 20 : TUnpredCycles = TCycles + NotTakenBranchCost;
1951 : FUnpredCycles = TakenBranchCost;
1952 : } else {
1953 : // Diamond: TBB is the block that is branched to, FBB is the fallthrough
1954 4 : TUnpredCycles = TCycles + TakenBranchCost;
1955 4 : FUnpredCycles = FCycles + NotTakenBranchCost;
1956 : // The branch at the end of FBB will disappear when it's predicated, so
1957 : // discount it from PredCost.
1958 4 : PredCost -= 1 * ScalingUpFactor;
1959 : }
1960 : // The total cost is the cost of each path scaled by their probabilites
1961 24 : unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
1962 48 : unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
1963 24 : UnpredCost = TUnpredCost + FUnpredCost;
1964 : // When predicating assume that the first IT can be folded away but later
1965 : // ones cost one cycle each
1966 48 : if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
1967 5 : PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
1968 : }
1969 : } else {
1970 1526 : unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1971 : unsigned FUnpredCost =
1972 3052 : Probability.getCompl().scale(FCycles * ScalingUpFactor);
1973 1526 : UnpredCost = TUnpredCost + FUnpredCost;
1974 1526 : UnpredCost += 1 * ScalingUpFactor; // The branch itself
1975 1526 : UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1976 : }
1977 :
1978 1550 : return PredCost <= UnpredCost;
1979 : }
1980 :
1981 : bool
1982 73 : ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
1983 : MachineBasicBlock &FMBB) const {
1984 : // Reduce false anti-dependencies to let the target's out-of-order execution
1985 : // engine do its thing.
1986 73 : return Subtarget.isProfitableToUnpredicate();
1987 : }
1988 :
1989 : /// getInstrPredicate - If instruction is predicated, returns its predicate
1990 : /// condition, otherwise returns AL. It also returns the condition code
1991 : /// register by reference.
1992 93866 : ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
1993 : unsigned &PredReg) {
1994 93866 : int PIdx = MI.findFirstPredOperandIdx();
1995 93866 : if (PIdx == -1) {
1996 14087 : PredReg = 0;
1997 14087 : return ARMCC::AL;
1998 : }
1999 :
2000 79779 : PredReg = MI.getOperand(PIdx+1).getReg();
2001 159558 : return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2002 : }
2003 :
2004 0 : unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2005 0 : if (Opc == ARM::B)
2006 : return ARM::Bcc;
2007 0 : if (Opc == ARM::tB)
2008 : return ARM::tBcc;
2009 0 : if (Opc == ARM::t2B)
2010 : return ARM::t2Bcc;
2011 :
2012 0 : llvm_unreachable("Unknown unconditional branch opcode!");
2013 : }
2014 :
2015 10608 : MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
2016 : bool NewMI,
2017 : unsigned OpIdx1,
2018 : unsigned OpIdx2) const {
2019 21216 : switch (MI.getOpcode()) {
2020 1011 : case ARM::MOVCCr:
2021 : case ARM::t2MOVCCr: {
2022 : // MOVCC can be commuted by inverting the condition.
2023 1011 : unsigned PredReg = 0;
2024 1011 : ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2025 : // MOVCC AL can't be inverted. Shouldn't happen.
2026 1011 : if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2027 : return nullptr;
2028 : MachineInstr *CommutedMI =
2029 1011 : TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2030 1011 : if (!CommutedMI)
2031 : return nullptr;
2032 : // After swapping the MOVCC operands, also invert the condition.
2033 1011 : CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2034 1011 : .setImm(ARMCC::getOppositeCondition(CC));
2035 1011 : return CommutedMI;
2036 : }
2037 : }
2038 9597 : return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2039 : }
2040 :
2041 : /// Identify instructions that can be folded into a MOVCC instruction, and
2042 : /// return the defining instruction.
2043 0 : static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
2044 : const MachineRegisterInfo &MRI,
2045 : const TargetInstrInfo *TII) {
2046 0 : if (!TargetRegisterInfo::isVirtualRegister(Reg))
2047 0 : return nullptr;
2048 0 : if (!MRI.hasOneNonDBGUse(Reg))
2049 0 : return nullptr;
2050 0 : MachineInstr *MI = MRI.getVRegDef(Reg);
2051 0 : if (!MI)
2052 0 : return nullptr;
2053 : // MI is folded into the MOVCC by predicating it.
2054 0 : if (!MI->isPredicable())
2055 0 : return nullptr;
2056 : // Check if MI has any non-dead defs or physreg uses. This also detects
2057 : // predicated instructions which will be reading CPSR.
2058 0 : for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
2059 0 : const MachineOperand &MO = MI->getOperand(i);
2060 : // Reject frame index operands, PEI can't handle the predicated pseudos.
2061 0 : if (MO.isFI() || MO.isCPI() || MO.isJTI())
2062 0 : return nullptr;
2063 0 : if (!MO.isReg())
2064 0 : continue;
2065 : // MI can't have any tied operands, that would conflict with predication.
2066 0 : if (MO.isTied())
2067 0 : return nullptr;
2068 0 : if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
2069 0 : return nullptr;
2070 0 : if (MO.isDef() && !MO.isDead())
2071 0 : return nullptr;
2072 : }
2073 0 : bool DontMoveAcrossStores = true;
2074 0 : if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
2075 0 : return nullptr;
2076 : return MI;
2077 : }
2078 :
2079 467 : bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
2080 : SmallVectorImpl<MachineOperand> &Cond,
2081 : unsigned &TrueOp, unsigned &FalseOp,
2082 : bool &Optimizable) const {
2083 : assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2084 : "Unknown select instruction");
2085 : // MOVCC operands:
2086 : // 0: Def.
2087 : // 1: True use.
2088 : // 2: False use.
2089 : // 3: Condition code.
2090 : // 4: CPSR use.
2091 467 : TrueOp = 1;
2092 467 : FalseOp = 2;
2093 934 : Cond.push_back(MI.getOperand(3));
2094 934 : Cond.push_back(MI.getOperand(4));
2095 : // We can always fold a def.
2096 467 : Optimizable = true;
2097 467 : return false;
2098 : }
2099 :
2100 : MachineInstr *
2101 467 : ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
2102 : SmallPtrSetImpl<MachineInstr *> &SeenMIs,
2103 : bool PreferFalse) const {
2104 : assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2105 : "Unknown select instruction");
2106 467 : MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2107 467 : MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2108 : bool Invert = !DefMI;
2109 467 : if (!DefMI)
2110 397 : DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2111 467 : if (!DefMI)
2112 : return nullptr;
2113 :
2114 : // Find new register class to use.
2115 238 : MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2116 168 : unsigned DestReg = MI.getOperand(0).getReg();
2117 168 : const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
2118 168 : if (!MRI.constrainRegClass(DestReg, PreviousClass))
2119 : return nullptr;
2120 :
2121 : // Create a new predicated version of DefMI.
2122 : // Rfalse is the first use.
2123 : MachineInstrBuilder NewMI =
2124 336 : BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2125 :
2126 : // Copy all the DefMI operands, excluding its (null) predicate.
2127 168 : const MCInstrDesc &DefDesc = DefMI->getDesc();
2128 452 : for (unsigned i = 1, e = DefDesc.getNumOperands();
2129 452 : i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
2130 284 : NewMI.add(DefMI->getOperand(i));
2131 :
2132 168 : unsigned CondCode = MI.getOperand(3).getImm();
2133 168 : if (Invert)
2134 98 : NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
2135 : else
2136 : NewMI.addImm(CondCode);
2137 168 : NewMI.add(MI.getOperand(4));
2138 :
2139 : // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2140 168 : if (NewMI->hasOptionalDef())
2141 156 : NewMI.add(condCodeOp());
2142 :
2143 : // The output register value when the predicate is false is an implicit
2144 : // register operand tied to the first def.
2145 : // The tie makes the register allocator ensure the FalseReg is allocated the
2146 : // same register as operand 0.
2147 : FalseReg.setImplicit();
2148 : NewMI.add(FalseReg);
2149 168 : NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2150 :
2151 : // Update SeenMIs set: register newly created MI and erase removed DefMI.
2152 168 : SeenMIs.insert(NewMI);
2153 : SeenMIs.erase(DefMI);
2154 :
2155 : // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2156 : // DefMI would be invalid when tranferred inside the loop. Checking for a
2157 : // loop is expensive, but at least remove kill flags if they are in different
2158 : // BBs.
2159 168 : if (DefMI->getParent() != MI.getParent())
2160 17 : NewMI->clearKillInfo();
2161 :
2162 : // The caller will erase MI, but not DefMI.
2163 168 : DefMI->eraseFromParent();
2164 168 : return NewMI;
2165 : }
2166 :
2167 : /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2168 : /// instruction is encoded with an 'S' bit is determined by the optional CPSR
2169 : /// def operand.
2170 : ///
2171 : /// This will go away once we can teach tblgen how to set the optional CPSR def
2172 : /// operand itself.
2173 : struct AddSubFlagsOpcodePair {
2174 : uint16_t PseudoOpc;
2175 : uint16_t MachineOpc;
2176 : };
2177 :
2178 : static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
2179 : {ARM::ADDSri, ARM::ADDri},
2180 : {ARM::ADDSrr, ARM::ADDrr},
2181 : {ARM::ADDSrsi, ARM::ADDrsi},
2182 : {ARM::ADDSrsr, ARM::ADDrsr},
2183 :
2184 : {ARM::SUBSri, ARM::SUBri},
2185 : {ARM::SUBSrr, ARM::SUBrr},
2186 : {ARM::SUBSrsi, ARM::SUBrsi},
2187 : {ARM::SUBSrsr, ARM::SUBrsr},
2188 :
2189 : {ARM::RSBSri, ARM::RSBri},
2190 : {ARM::RSBSrsi, ARM::RSBrsi},
2191 : {ARM::RSBSrsr, ARM::RSBrsr},
2192 :
2193 : {ARM::tADDSi3, ARM::tADDi3},
2194 : {ARM::tADDSi8, ARM::tADDi8},
2195 : {ARM::tADDSrr, ARM::tADDrr},
2196 : {ARM::tADCS, ARM::tADC},
2197 :
2198 : {ARM::tSUBSi3, ARM::tSUBi3},
2199 : {ARM::tSUBSi8, ARM::tSUBi8},
2200 : {ARM::tSUBSrr, ARM::tSUBrr},
2201 : {ARM::tSBCS, ARM::tSBC},
2202 :
2203 : {ARM::t2ADDSri, ARM::t2ADDri},
2204 : {ARM::t2ADDSrr, ARM::t2ADDrr},
2205 : {ARM::t2ADDSrs, ARM::t2ADDrs},
2206 :
2207 : {ARM::t2SUBSri, ARM::t2SUBri},
2208 : {ARM::t2SUBSrr, ARM::t2SUBrr},
2209 : {ARM::t2SUBSrs, ARM::t2SUBrs},
2210 :
2211 : {ARM::t2RSBSri, ARM::t2RSBri},
2212 : {ARM::t2RSBSrs, ARM::t2RSBrs},
2213 : };
2214 :
2215 1315052 : unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2216 36811350 : for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
2217 35496951 : if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
2218 653 : return AddSubFlagsOpcodeMap[i].MachineOpc;
2219 : return 0;
2220 : }
2221 :
2222 2953 : void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
2223 : MachineBasicBlock::iterator &MBBI,
2224 : const DebugLoc &dl, unsigned DestReg,
2225 : unsigned BaseReg, int NumBytes,
2226 : ARMCC::CondCodes Pred, unsigned PredReg,
2227 : const ARMBaseInstrInfo &TII,
2228 : unsigned MIFlags) {
2229 2953 : if (NumBytes == 0 && DestReg != BaseReg) {
2230 616 : BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2231 308 : .addReg(BaseReg, RegState::Kill)
2232 308 : .add(predOps(Pred, PredReg))
2233 308 : .add(condCodeOp())
2234 : .setMIFlags(MIFlags);
2235 308 : return;
2236 : }
2237 :
2238 : bool isSub = NumBytes < 0;
2239 2645 : if (isSub) NumBytes = -NumBytes;
2240 :
2241 5346 : while (NumBytes) {
2242 2701 : unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2243 2701 : unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
2244 : assert(ThisVal && "Didn't extract field correctly");
2245 :
2246 : // We will handle these bits from offset, clear them.
2247 2701 : NumBytes &= ~ThisVal;
2248 :
2249 : assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2250 :
2251 : // Build the new ADD / SUB.
2252 2701 : unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2253 5402 : BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2254 2701 : .addReg(BaseReg, RegState::Kill)
2255 2701 : .addImm(ThisVal)
2256 2701 : .add(predOps(Pred, PredReg))
2257 2701 : .add(condCodeOp())
2258 : .setMIFlags(MIFlags);
2259 : BaseReg = DestReg;
2260 : }
2261 : }
2262 :
2263 3117 : bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
2264 : MachineFunction &MF, MachineInstr *MI,
2265 : unsigned NumBytes) {
2266 : // This optimisation potentially adds lots of load and store
2267 : // micro-operations, it's only really a great benefit to code-size.
2268 3117 : if (!MF.getFunction().optForMinSize())
2269 : return false;
2270 :
2271 : // If only one register is pushed/popped, LLVM can use an LDR/STR
2272 : // instead. We can't modify those so make sure we're dealing with an
2273 : // instruction we understand.
2274 174 : bool IsPop = isPopOpcode(MI->getOpcode());
2275 : bool IsPush = isPushOpcode(MI->getOpcode());
2276 87 : if (!IsPush && !IsPop)
2277 : return false;
2278 :
2279 83 : bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2280 : MI->getOpcode() == ARM::VLDMDIA_UPD;
2281 72 : bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2282 153 : MI->getOpcode() == ARM::tPOP ||
2283 : MI->getOpcode() == ARM::tPOP_RET;
2284 :
2285 : assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2286 : MI->getOperand(1).getReg() == ARM::SP)) &&
2287 : "trying to fold sp update into non-sp-updating push/pop");
2288 :
2289 : // The VFP push & pop act on D-registers, so we can only fold an adjustment
2290 : // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2291 : // if this is violated.
2292 144 : if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2293 : return false;
2294 :
2295 : // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2296 : // pred) so the list starts at 4. Thumb1 starts after the predicate.
2297 81 : int RegListIdx = IsT1PushPop ? 2 : 4;
2298 :
2299 : // Calculate the space we'll need in terms of registers.
2300 : unsigned RegsNeeded;
2301 : const TargetRegisterClass *RegClass;
2302 81 : if (IsVFPPushPop) {
2303 20 : RegsNeeded = NumBytes / 8;
2304 : RegClass = &ARM::DPRRegClass;
2305 : } else {
2306 61 : RegsNeeded = NumBytes / 4;
2307 : RegClass = &ARM::GPRRegClass;
2308 : }
2309 :
2310 : // We're going to have to strip all list operands off before
2311 : // re-adding them since the order matters, so save the existing ones
2312 : // for later.
2313 : SmallVector<MachineOperand, 4> RegList;
2314 :
2315 : // We're also going to need the first register transferred by this
2316 : // instruction, which won't necessarily be the first register in the list.
2317 : unsigned FirstRegEnc = -1;
2318 :
2319 81 : const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
2320 332 : for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2321 251 : MachineOperand &MO = MI->getOperand(i);
2322 251 : RegList.push_back(MO);
2323 :
2324 251 : if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2325 : FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2326 : }
2327 :
2328 81 : const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2329 :
2330 : // Now try to find enough space in the reglist to allocate NumBytes.
2331 396 : for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2332 : --CurRegEnc) {
2333 319 : unsigned CurReg = RegClass->getRegister(CurRegEnc);
2334 319 : if (!IsPop) {
2335 : // Pushing any register is completely harmless, mark the register involved
2336 : // as undef since we don't care about its value and must not restore it
2337 : // during stack unwinding.
2338 157 : RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2339 : false, false, true));
2340 157 : --RegsNeeded;
2341 157 : continue;
2342 : }
2343 :
2344 : // However, we can only pop an extra register if it's not live. For
2345 : // registers live within the function we might clobber a return value
2346 : // register; the other way a register can be live here is if it's
2347 : // callee-saved.
2348 269 : if (isCalleeSavedRegister(CurReg, CSRegs) ||
2349 317 : MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2350 : MachineBasicBlock::LQR_Dead) {
2351 : // VFP pops don't allow holes in the register list, so any skip is fatal
2352 : // for our transformation. GPR pops do, so we should just keep looking.
2353 59 : if (IsVFPPushPop)
2354 : return false;
2355 : else
2356 : continue;
2357 : }
2358 :
2359 : // Mark the unimportant registers as <def,dead> in the POP.
2360 103 : RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2361 : true));
2362 103 : --RegsNeeded;
2363 : }
2364 :
2365 77 : if (RegsNeeded > 0)
2366 : return false;
2367 :
2368 : // Finally we know we can profitably perform the optimisation so go
2369 : // ahead: strip all existing registers off and add them back again
2370 : // in the right order.
2371 207 : for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2372 154 : MI->RemoveOperand(i);
2373 :
2374 : // Add the complete list back in.
2375 : MachineInstrBuilder MIB(MF, &*MI);
2376 360 : for (int i = RegList.size() - 1; i >= 0; --i)
2377 307 : MIB.add(RegList[i]);
2378 :
2379 : return true;
2380 : }
2381 :
2382 8000 : bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2383 : unsigned FrameReg, int &Offset,
2384 : const ARMBaseInstrInfo &TII) {
2385 8000 : unsigned Opcode = MI.getOpcode();
2386 : const MCInstrDesc &Desc = MI.getDesc();
2387 8000 : unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2388 : bool isSub = false;
2389 :
2390 : // Memory operands in inline assembly always use AddrMode2.
2391 8000 : if (Opcode == ARM::INLINEASM)
2392 : AddrMode = ARMII::AddrMode2;
2393 :
2394 8000 : if (Opcode == ARM::ADDri) {
2395 1265 : Offset += MI.getOperand(FrameRegIdx+1).getImm();
2396 1265 : if (Offset == 0) {
2397 : // Turn it into a move.
2398 166 : MI.setDesc(TII.get(ARM::MOVr));
2399 166 : MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2400 166 : MI.RemoveOperand(FrameRegIdx+1);
2401 166 : Offset = 0;
2402 166 : return true;
2403 1099 : } else if (Offset < 0) {
2404 51 : Offset = -Offset;
2405 : isSub = true;
2406 51 : MI.setDesc(TII.get(ARM::SUBri));
2407 : }
2408 :
2409 : // Common case: small offset, fits into instruction.
2410 1099 : if (ARM_AM::getSOImmVal(Offset) != -1) {
2411 : // Replace the FrameIndex with sp / fp
2412 777 : MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2413 1554 : MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2414 777 : Offset = 0;
2415 777 : return true;
2416 : }
2417 :
2418 : // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2419 : // as possible.
2420 322 : unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2421 322 : unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2422 :
2423 : // We will handle these bits from offset, clear them.
2424 322 : Offset &= ~ThisImmVal;
2425 :
2426 : // Get the properly encoded SOImmVal field.
2427 : assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2428 : "Bit extraction didn't work?");
2429 322 : MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2430 : } else {
2431 : unsigned ImmIdx = 0;
2432 : int InstrOffs = 0;
2433 : unsigned NumBits = 0;
2434 : unsigned Scale = 1;
2435 6735 : switch (AddrMode) {
2436 6000 : case ARMII::AddrMode_i12:
2437 6000 : ImmIdx = FrameRegIdx + 1;
2438 6000 : InstrOffs = MI.getOperand(ImmIdx).getImm();
2439 : NumBits = 12;
2440 6000 : break;
2441 0 : case ARMII::AddrMode2:
2442 0 : ImmIdx = FrameRegIdx+2;
2443 0 : InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2444 : if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2445 0 : InstrOffs *= -1;
2446 : NumBits = 12;
2447 : break;
2448 70 : case ARMII::AddrMode3:
2449 70 : ImmIdx = FrameRegIdx+2;
2450 210 : InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2451 : if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2452 0 : InstrOffs *= -1;
2453 : NumBits = 8;
2454 : break;
2455 : case ARMII::AddrMode4:
2456 : case ARMII::AddrMode6:
2457 : // Can't fold any offset even if it's zero.
2458 : return false;
2459 614 : case ARMII::AddrMode5:
2460 614 : ImmIdx = FrameRegIdx+1;
2461 1842 : InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2462 : if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2463 0 : InstrOffs *= -1;
2464 : NumBits = 8;
2465 : Scale = 4;
2466 : break;
2467 22 : case ARMII::AddrMode5FP16:
2468 22 : ImmIdx = FrameRegIdx+1;
2469 66 : InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2470 : if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2471 0 : InstrOffs *= -1;
2472 : NumBits = 8;
2473 : Scale = 2;
2474 : break;
2475 0 : default:
2476 0 : llvm_unreachable("Unsupported addressing mode!");
2477 : }
2478 :
2479 6706 : Offset += InstrOffs * Scale;
2480 : assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2481 6706 : if (Offset < 0) {
2482 215 : Offset = -Offset;
2483 : isSub = true;
2484 : }
2485 :
2486 : // Attempt to fold address comp. if opcode has offset bits
2487 : if (NumBits > 0) {
2488 : // Common case: small offset, fits into instruction.
2489 6706 : MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2490 6706 : int ImmedOffset = Offset / Scale;
2491 6706 : unsigned Mask = (1 << NumBits) - 1;
2492 6706 : if ((unsigned)Offset <= Mask * Scale) {
2493 : // Replace the FrameIndex with sp
2494 6660 : MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2495 : // FIXME: When addrmode2 goes away, this will simplify (like the
2496 : // T2 version), as the LDR.i12 versions don't need the encoding
2497 : // tricks for the offset value.
2498 6660 : if (isSub) {
2499 215 : if (AddrMode == ARMII::AddrMode_i12)
2500 169 : ImmedOffset = -ImmedOffset;
2501 : else
2502 46 : ImmedOffset |= 1 << NumBits;
2503 : }
2504 6660 : ImmOp.ChangeToImmediate(ImmedOffset);
2505 6660 : Offset = 0;
2506 6660 : return true;
2507 : }
2508 :
2509 : // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2510 46 : ImmedOffset = ImmedOffset & Mask;
2511 46 : if (isSub) {
2512 0 : if (AddrMode == ARMII::AddrMode_i12)
2513 0 : ImmedOffset = -ImmedOffset;
2514 : else
2515 0 : ImmedOffset |= 1 << NumBits;
2516 : }
2517 46 : ImmOp.ChangeToImmediate(ImmedOffset);
2518 46 : Offset &= ~(Mask*Scale);
2519 : }
2520 : }
2521 :
2522 368 : Offset = (isSub) ? -Offset : Offset;
2523 368 : return Offset == 0;
2524 : }
2525 :
2526 : /// analyzeCompare - For a comparison instruction, return the source registers
2527 : /// in SrcReg and SrcReg2 if having two register operands, and the value it
2528 : /// compares against in CmpValue. Return true if the comparison instruction
2529 : /// can be analyzed.
2530 37293 : bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
2531 : unsigned &SrcReg2, int &CmpMask,
2532 : int &CmpValue) const {
2533 74586 : switch (MI.getOpcode()) {
2534 : default: break;
2535 5024 : case ARM::CMPri:
2536 : case ARM::t2CMPri:
2537 : case ARM::tCMPi8:
2538 5024 : SrcReg = MI.getOperand(0).getReg();
2539 5024 : SrcReg2 = 0;
2540 5024 : CmpMask = ~0;
2541 5024 : CmpValue = MI.getOperand(1).getImm();
2542 5024 : return true;
2543 891 : case ARM::CMPrr:
2544 : case ARM::t2CMPrr:
2545 891 : SrcReg = MI.getOperand(0).getReg();
2546 891 : SrcReg2 = MI.getOperand(1).getReg();
2547 891 : CmpMask = ~0;
2548 891 : CmpValue = 0;
2549 891 : return true;
2550 267 : case ARM::TSTri:
2551 : case ARM::t2TSTri:
2552 267 : SrcReg = MI.getOperand(0).getReg();
2553 267 : SrcReg2 = 0;
2554 267 : CmpMask = MI.getOperand(1).getImm();
2555 267 : CmpValue = 0;
2556 267 : return true;
2557 : }
2558 :
2559 : return false;
2560 : }
2561 :
2562 : /// isSuitableForMask - Identify a suitable 'and' instruction that
2563 : /// operates on the given source register and applies the same mask
2564 : /// as a 'tst' instruction. Provide a limited look-through for copies.
2565 : /// When successful, MI will hold the found instruction.
2566 : static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2567 : int CmpMask, bool CommonUse) {
2568 194 : switch (MI->getOpcode()) {
2569 0 : case ARM::ANDri:
2570 : case ARM::t2ANDri:
2571 0 : if (CmpMask != MI->getOperand(2).getImm())
2572 : return false;
2573 0 : if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2574 : return true;
2575 : break;
2576 : }
2577 :
2578 : return false;
2579 : }
2580 :
2581 : /// getSwappedCondition - assume the flags are set by MI(a,b), return
2582 : /// the condition code if we modify the instructions such that flags are
2583 : /// set by MI(b,a).
2584 : inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
2585 : switch (CC) {
2586 : default: return ARMCC::AL;
2587 : case ARMCC::EQ: return ARMCC::EQ;
2588 : case ARMCC::NE: return ARMCC::NE;
2589 : case ARMCC::HS: return ARMCC::LS;
2590 : case ARMCC::LO: return ARMCC::HI;
2591 : case ARMCC::HI: return ARMCC::LO;
2592 : case ARMCC::LS: return ARMCC::HS;
2593 : case ARMCC::GE: return ARMCC::LE;
2594 : case ARMCC::LT: return ARMCC::GT;
2595 : case ARMCC::GT: return ARMCC::LT;
2596 : case ARMCC::LE: return ARMCC::GE;
2597 : }
2598 : }
2599 :
2600 : /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2601 : /// the condition code if we modify the instructions such that flags are
2602 : /// set by ADD(a,b,X).
2603 : inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
2604 : switch (CC) {
2605 : default: return ARMCC::AL;
2606 : case ARMCC::HS: return ARMCC::LO;
2607 : case ARMCC::LO: return ARMCC::HS;
2608 : case ARMCC::VS: return ARMCC::VS;
2609 : case ARMCC::VC: return ARMCC::VC;
2610 : }
2611 : }
2612 :
2613 : /// isRedundantFlagInstr - check whether the first instruction, whose only
2614 : /// purpose is to update flags, can be made redundant.
2615 : /// CMPrr can be made redundant by SUBrr if the operands are the same.
2616 : /// CMPri can be made redundant by SUBri if the operands are the same.
2617 : /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2618 : /// This function can be extended later on.
2619 5833 : inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2620 : unsigned SrcReg, unsigned SrcReg2,
2621 : int ImmValue, const MachineInstr *OI) {
2622 5833 : if ((CmpI->getOpcode() == ARM::CMPrr ||
2623 1376 : CmpI->getOpcode() == ARM::t2CMPrr) &&
2624 1376 : (OI->getOpcode() == ARM::SUBrr ||
2625 5833 : OI->getOpcode() == ARM::t2SUBrr) &&
2626 50 : ((OI->getOperand(1).getReg() == SrcReg &&
2627 50 : OI->getOperand(2).getReg() == SrcReg2) ||
2628 14 : (OI->getOperand(1).getReg() == SrcReg2 &&
2629 14 : OI->getOperand(2).getReg() == SrcReg)))
2630 : return true;
2631 :
2632 4348 : if ((CmpI->getOpcode() == ARM::CMPri ||
2633 3832 : CmpI->getOpcode() == ARM::t2CMPri) &&
2634 3832 : (OI->getOpcode() == ARM::SUBri ||
2635 239 : OI->getOpcode() == ARM::t2SUBri) &&
2636 6040 : OI->getOperand(1).getReg() == SrcReg &&
2637 18 : OI->getOperand(2).getImm() == ImmValue)
2638 : return true;
2639 :
2640 5792 : if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2641 1344 : (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2642 1344 : OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2643 80 : OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2644 5867 : OI->getOperand(0).getReg() == SrcReg &&
2645 41 : OI->getOperand(1).getReg() == SrcReg2)
2646 10 : return true;
2647 : return false;
2648 : }
2649 :
2650 1464 : static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2651 2928 : switch (MI->getOpcode()) {
2652 : default: return false;
2653 66 : case ARM::tLSLri:
2654 : case ARM::tLSRri:
2655 : case ARM::tLSLrr:
2656 : case ARM::tLSRrr:
2657 : case ARM::tSUBrr:
2658 : case ARM::tADDrr:
2659 : case ARM::tADDi3:
2660 : case ARM::tADDi8:
2661 : case ARM::tSUBi3:
2662 : case ARM::tSUBi8:
2663 : case ARM::tMUL:
2664 66 : IsThumb1 = true;
2665 : LLVM_FALLTHROUGH;
2666 : case ARM::RSBrr:
2667 : case ARM::RSBri:
2668 : case ARM::RSCrr:
2669 : case ARM::RSCri:
2670 : case ARM::ADDrr:
2671 : case ARM::ADDri:
2672 : case ARM::ADCrr:
2673 : case ARM::ADCri:
2674 : case ARM::SUBrr:
2675 : case ARM::SUBri:
2676 : case ARM::SBCrr:
2677 : case ARM::SBCri:
2678 : case ARM::t2RSBri:
2679 : case ARM::t2ADDrr:
2680 : case ARM::t2ADDri:
2681 : case ARM::t2ADCrr:
2682 : case ARM::t2ADCri:
2683 : case ARM::t2SUBrr:
2684 : case ARM::t2SUBri:
2685 : case ARM::t2SBCrr:
2686 : case ARM::t2SBCri:
2687 : case ARM::ANDrr:
2688 : case ARM::ANDri:
2689 : case ARM::t2ANDrr:
2690 : case ARM::t2ANDri:
2691 : case ARM::ORRrr:
2692 : case ARM::ORRri:
2693 : case ARM::t2ORRrr:
2694 : case ARM::t2ORRri:
2695 : case ARM::EORrr:
2696 : case ARM::EORri:
2697 : case ARM::t2EORrr:
2698 : case ARM::t2EORri:
2699 : case ARM::t2LSRri:
2700 : case ARM::t2LSRrr:
2701 : case ARM::t2LSLri:
2702 : case ARM::t2LSLrr:
2703 : return true;
2704 : }
2705 : }
2706 :
2707 : /// optimizeCompareInstr - Convert the instruction supplying the argument to the
2708 : /// comparison into one that sets the zero bit in the flags register;
2709 : /// Remove a redundant Compare instruction if an earlier instruction can set the
2710 : /// flags in the same way as Compare.
2711 : /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2712 : /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2713 : /// condition code of instructions which use the flags.
2714 2706 : bool ARMBaseInstrInfo::optimizeCompareInstr(
2715 : MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
2716 : int CmpValue, const MachineRegisterInfo *MRI) const {
2717 : // Get the unique definition of SrcReg.
2718 2706 : MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2719 2706 : if (!MI) return false;
2720 :
2721 : // Masked compares sometimes use the same register as the corresponding 'and'.
2722 2706 : if (CmpMask != ~0) {
2723 0 : if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2724 : MI = nullptr;
2725 67 : for (MachineRegisterInfo::use_instr_iterator
2726 47 : UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2727 181 : UI != UE; ++UI) {
2728 67 : if (UI->getParent() != CmpInstr.getParent())
2729 : continue;
2730 : MachineInstr *PotentialAND = &*UI;
2731 0 : if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2732 0 : isPredicated(*PotentialAND))
2733 50 : continue;
2734 : MI = PotentialAND;
2735 : break;
2736 : }
2737 47 : if (!MI) return false;
2738 : }
2739 : }
2740 :
2741 : // Get ready to iterate backward from CmpInstr.
2742 : MachineBasicBlock::iterator I = CmpInstr, E = MI,
2743 2659 : B = CmpInstr.getParent()->begin();
2744 :
2745 : // Early exit if CmpInstr is at the beginning of the BB.
2746 2659 : if (I == B) return false;
2747 :
2748 : // There are two possible candidates which can be changed to set CPSR:
2749 : // One is MI, the other is a SUB or ADD instruction.
2750 : // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2751 : // ADDr[ri](r1, r2, X).
2752 : // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2753 : MachineInstr *SubAdd = nullptr;
2754 2490 : if (SrcReg2 != 0)
2755 : // MI is not a candidate for CMPrr.
2756 : MI = nullptr;
2757 2026 : else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2758 : // Conservatively refuse to convert an instruction which isn't in the same
2759 : // BB as the comparison.
2760 : // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2761 : // Thus we cannot return here.
2762 1124 : if (CmpInstr.getOpcode() == ARM::CMPri ||
2763 : CmpInstr.getOpcode() == ARM::t2CMPri)
2764 : MI = nullptr;
2765 : else
2766 : return false;
2767 : }
2768 :
2769 960 : bool IsThumb1 = false;
2770 1464 : if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2771 : return false;
2772 :
2773 : // We also want to do this peephole for cases like this: if (a*b == 0),
2774 : // and optimise away the CMP instruction from the generated code sequence:
2775 : // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2776 : // resulting from the select instruction, but these MOVS instructions for
2777 : // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2778 : // However, if we only have MOVS instructions in between the CMP and the
2779 : // other instruction (the MULS in this example), then the CPSR is dead so we
2780 : // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2781 : // reordering and then continue the analysis hoping we can eliminate the
2782 : // CMP. This peephole works on the vregs, so is still in SSA form. As a
2783 : // consequence, the movs won't redefine/kill the MUL operands which would
2784 : // make this reordering illegal.
2785 1317 : if (MI && IsThumb1) {
2786 : --I;
2787 : bool CanReorder = true;
2788 : const bool HasStmts = I != E;
2789 70 : for (; I != E; --I) {
2790 14 : if (I->getOpcode() != ARM::tMOVi8) {
2791 : CanReorder = false;
2792 : break;
2793 : }
2794 : }
2795 66 : if (HasStmts && CanReorder) {
2796 2 : MI = MI->removeFromParent();
2797 : E = CmpInstr;
2798 2 : CmpInstr.getParent()->insert(E, MI);
2799 : }
2800 66 : I = CmpInstr;
2801 : E = MI;
2802 : }
2803 :
2804 : // Check that CPSR isn't set between the comparison instruction and the one we
2805 : // want to change. At the same time, search for SubAdd.
2806 1317 : const TargetRegisterInfo *TRI = &getRegisterInfo();
2807 : do {
2808 : const MachineInstr &Instr = *--I;
2809 :
2810 : // Check whether CmpInstr can be made redundant by the current instruction.
2811 2357 : if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
2812 : SubAdd = &*I;
2813 : break;
2814 : }
2815 :
2816 : // Allow E (which was initially MI) to be SubAdd but do not search before E.
2817 2316 : if (I == E)
2818 : break;
2819 :
2820 2354 : if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2821 : Instr.readsRegister(ARM::CPSR, TRI))
2822 : // This instruction modifies or uses CPSR after the one we want to
2823 : // change. We can't do this transformation.
2824 79 : return false;
2825 :
2826 1111 : } while (I != B);
2827 :
2828 : // Return false if no candidates exist.
2829 1238 : if (!MI && !SubAdd)
2830 : return false;
2831 :
2832 : // The single candidate is called MI.
2833 394 : if (!MI) MI = SubAdd;
2834 :
2835 : // We can't use a predicated instruction - it doesn't always write the flags.
2836 394 : if (isPredicated(*MI))
2837 : return false;
2838 :
2839 : // Scan forward for the use of CPSR
2840 : // When checking against MI: if it's a conditional code that requires
2841 : // checking of the V bit or C bit, then this is not safe to do.
2842 : // It is safe to remove CmpInstr if CPSR is redefined or killed.
2843 : // If we are done with the basic block, we need to check whether CPSR is
2844 : // live-out.
2845 : SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
2846 : OperandsToUpdate;
2847 : bool isSafe = false;
2848 390 : I = CmpInstr;
2849 390 : E = CmpInstr.getParent()->end();
2850 2586 : while (!isSafe && ++I != E) {
2851 : const MachineInstr &Instr = *I;
2852 3856 : for (unsigned IO = 0, EO = Instr.getNumOperands();
2853 3856 : !isSafe && IO != EO; ++IO) {
2854 2963 : const MachineOperand &MO = Instr.getOperand(IO);
2855 2963 : if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2856 : isSafe = true;
2857 : break;
2858 : }
2859 2957 : if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2860 : continue;
2861 421 : if (MO.isDef()) {
2862 : isSafe = true;
2863 : break;
2864 : }
2865 : // Condition code is after the operand before CPSR except for VSELs.
2866 : ARMCC::CondCodes CC;
2867 : bool IsInstrVSel = true;
2868 814 : switch (Instr.getOpcode()) {
2869 403 : default:
2870 : IsInstrVSel = false;
2871 403 : CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2872 403 : break;
2873 : case ARM::VSELEQD:
2874 : case ARM::VSELEQS:
2875 : CC = ARMCC::EQ;
2876 : break;
2877 0 : case ARM::VSELGTD:
2878 : case ARM::VSELGTS:
2879 : CC = ARMCC::GT;
2880 0 : break;
2881 0 : case ARM::VSELGED:
2882 : case ARM::VSELGES:
2883 : CC = ARMCC::GE;
2884 0 : break;
2885 0 : case ARM::VSELVSS:
2886 : case ARM::VSELVSD:
2887 : CC = ARMCC::VS;
2888 0 : break;
2889 : }
2890 :
2891 407 : if (SubAdd) {
2892 : // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2893 : // on CMP needs to be updated to be based on SUB.
2894 : // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
2895 : // needs to be modified.
2896 : // Push the condition code operands to OperandsToUpdate.
2897 : // If it is safe to remove CmpInstr, the condition code of these
2898 : // operands will be modified.
2899 47 : unsigned Opc = SubAdd->getOpcode();
2900 47 : bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
2901 47 : Opc == ARM::SUBri || Opc == ARM::t2SUBri;
2902 41 : if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
2903 13 : SubAdd->getOperand(2).getReg() == SrcReg)) {
2904 : // VSel doesn't support condition code update.
2905 19 : if (IsInstrVSel)
2906 : return false;
2907 : // Ensure we can swap the condition.
2908 19 : ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
2909 19 : if (NewCC == ARMCC::AL)
2910 : return false;
2911 19 : OperandsToUpdate.push_back(
2912 38 : std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2913 : }
2914 : } else {
2915 : // No SubAdd, so this is x = <op> y, z; cmp x, 0.
2916 : switch (CC) {
2917 : case ARMCC::EQ: // Z
2918 : case ARMCC::NE: // Z
2919 : case ARMCC::MI: // N
2920 : case ARMCC::PL: // N
2921 : case ARMCC::AL: // none
2922 : // CPSR can be used multiple times, we should continue.
2923 : break;
2924 : case ARMCC::HS: // C
2925 : case ARMCC::LO: // C
2926 : case ARMCC::VS: // V
2927 : case ARMCC::VC: // V
2928 : case ARMCC::HI: // C Z
2929 : case ARMCC::LS: // C Z
2930 : case ARMCC::GE: // N V
2931 : case ARMCC::LT: // N V
2932 : case ARMCC::GT: // Z N V
2933 : case ARMCC::LE: // Z N V
2934 : // The instruction uses the V bit or C bit which is not safe.
2935 : return false;
2936 : }
2937 : }
2938 : }
2939 : }
2940 :
2941 : // If CPSR is not killed nor re-defined, we should check whether it is
2942 : // live-out. If it is live-out, do not optimize.
2943 376 : if (!isSafe) {
2944 356 : MachineBasicBlock *MBB = CmpInstr.getParent();
2945 : for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
2946 911 : SE = MBB->succ_end(); SI != SE; ++SI)
2947 558 : if ((*SI)->isLiveIn(ARM::CPSR))
2948 : return false;
2949 : }
2950 :
2951 : // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
2952 : // set CPSR so this is represented as an explicit output)
2953 373 : if (!IsThumb1) {
2954 616 : MI->getOperand(5).setReg(ARM::CPSR);
2955 616 : MI->getOperand(5).setIsDef(true);
2956 : }
2957 : assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
2958 373 : CmpInstr.eraseFromParent();
2959 :
2960 : // Modify the condition code of operands in OperandsToUpdate.
2961 : // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2962 : // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2963 389 : for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2964 32 : OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2965 :
2966 : return true;
2967 : }
2968 :
2969 40834 : bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
2970 : // Do not sink MI if it might be used to optimize a redundant compare.
2971 : // We heuristically only look at the instruction immediately following MI to
2972 : // avoid potentially searching the entire basic block.
2973 40834 : if (isPredicated(MI))
2974 : return true;
2975 : MachineBasicBlock::const_iterator Next = &MI;
2976 : ++Next;
2977 : unsigned SrcReg, SrcReg2;
2978 : int CmpMask, CmpValue;
2979 71677 : if (Next != MI.getParent()->end() &&
2980 37303 : analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
2981 3476 : isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
2982 10 : return false;
2983 : return true;
2984 : }
2985 :
2986 5953 : bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
2987 : unsigned Reg,
2988 : MachineRegisterInfo *MRI) const {
2989 : // Fold large immediates into add, sub, or, xor.
2990 5953 : unsigned DefOpc = DefMI.getOpcode();
2991 5953 : if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2992 : return false;
2993 2672 : if (!DefMI.getOperand(1).isImm())
2994 : // Could be t2MOVi32imm @xx
2995 : return false;
2996 :
2997 564 : if (!MRI->hasOneNonDBGUse(Reg))
2998 : return false;
2999 :
3000 345 : const MCInstrDesc &DefMCID = DefMI.getDesc();
3001 690 : if (DefMCID.hasOptionalDef()) {
3002 0 : unsigned NumOps = DefMCID.getNumOperands();
3003 0 : const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3004 0 : if (MO.getReg() == ARM::CPSR && !MO.isDead())
3005 : // If DefMI defines CPSR and it is not dead, it's obviously not safe
3006 : // to delete DefMI.
3007 : return false;
3008 : }
3009 :
3010 345 : const MCInstrDesc &UseMCID = UseMI.getDesc();
3011 690 : if (UseMCID.hasOptionalDef()) {
3012 156 : unsigned NumOps = UseMCID.getNumOperands();
3013 312 : if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3014 : // If the instruction sets the flag, do not attempt this optimization
3015 : // since it may change the semantics of the code.
3016 : return false;
3017 : }
3018 :
3019 : unsigned UseOpc = UseMI.getOpcode();
3020 : unsigned NewUseOpc = 0;
3021 340 : uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3022 : uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3023 : bool Commute = false;
3024 340 : switch (UseOpc) {
3025 : default: return false;
3026 32 : case ARM::SUBrr:
3027 : case ARM::ADDrr:
3028 : case ARM::ORRrr:
3029 : case ARM::EORrr:
3030 : case ARM::t2SUBrr:
3031 : case ARM::t2ADDrr:
3032 : case ARM::t2ORRrr:
3033 : case ARM::t2EORrr: {
3034 32 : Commute = UseMI.getOperand(2).getReg() != Reg;
3035 : switch (UseOpc) {
3036 : default: break;
3037 9 : case ARM::ADDrr:
3038 : case ARM::SUBrr:
3039 9 : if (UseOpc == ARM::SUBrr && Commute)
3040 : return false;
3041 :
3042 : // ADD/SUB are special because they're essentially the same operation, so
3043 : // we can handle a larger range of immediates.
3044 9 : if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3045 3 : NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3046 6 : else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3047 : ImmVal = -ImmVal;
3048 2 : NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3049 : } else
3050 : return false;
3051 : SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3052 : SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3053 5 : break;
3054 2 : case ARM::ORRrr:
3055 : case ARM::EORrr:
3056 2 : if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3057 : return false;
3058 : SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3059 : SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3060 : switch (UseOpc) {
3061 : default: break;
3062 1 : case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3063 0 : case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3064 : }
3065 : break;
3066 17 : case ARM::t2ADDrr:
3067 : case ARM::t2SUBrr:
3068 17 : if (UseOpc == ARM::t2SUBrr && Commute)
3069 : return false;
3070 :
3071 : // ADD/SUB are special because they're essentially the same operation, so
3072 : // we can handle a larger range of immediates.
3073 17 : if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3074 9 : NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
3075 8 : else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3076 : ImmVal = -ImmVal;
3077 2 : NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
3078 : } else
3079 : return false;
3080 11 : SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3081 : SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3082 11 : break;
3083 4 : case ARM::t2ORRrr:
3084 : case ARM::t2EORrr:
3085 4 : if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3086 : return false;
3087 0 : SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3088 : SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3089 : switch (UseOpc) {
3090 : default: break;
3091 0 : case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3092 0 : case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3093 : }
3094 : break;
3095 : }
3096 : }
3097 : }
3098 :
3099 17 : unsigned OpIdx = Commute ? 2 : 1;
3100 17 : unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
3101 : bool isKill = UseMI.getOperand(OpIdx).isKill();
3102 17 : unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
3103 34 : BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3104 34 : NewReg)
3105 17 : .addReg(Reg1, getKillRegState(isKill))
3106 17 : .addImm(SOImmValV1)
3107 17 : .add(predOps(ARMCC::AL))
3108 17 : .add(condCodeOp());
3109 17 : UseMI.setDesc(get(NewUseOpc));
3110 34 : UseMI.getOperand(1).setReg(NewReg);
3111 17 : UseMI.getOperand(1).setIsKill();
3112 34 : UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3113 17 : DefMI.eraseFromParent();
3114 17 : return true;
3115 : }
3116 :
3117 0 : static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3118 : const MachineInstr &MI) {
3119 0 : switch (MI.getOpcode()) {
3120 0 : default: {
3121 : const MCInstrDesc &Desc = MI.getDesc();
3122 0 : int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3123 : assert(UOps >= 0 && "bad # UOps");
3124 0 : return UOps;
3125 : }
3126 :
3127 0 : case ARM::LDRrs:
3128 : case ARM::LDRBrs:
3129 : case ARM::STRrs:
3130 : case ARM::STRBrs: {
3131 0 : unsigned ShOpVal = MI.getOperand(3).getImm();
3132 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3133 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3134 0 : if (!isSub &&
3135 0 : (ShImm == 0 ||
3136 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3137 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3138 0 : return 1;
3139 : return 2;
3140 : }
3141 :
3142 0 : case ARM::LDRH:
3143 : case ARM::STRH: {
3144 0 : if (!MI.getOperand(2).getReg())
3145 : return 1;
3146 :
3147 0 : unsigned ShOpVal = MI.getOperand(3).getImm();
3148 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3149 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3150 0 : if (!isSub &&
3151 0 : (ShImm == 0 ||
3152 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3153 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3154 0 : return 1;
3155 : return 2;
3156 : }
3157 :
3158 0 : case ARM::LDRSB:
3159 : case ARM::LDRSH:
3160 0 : return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3161 :
3162 0 : case ARM::LDRSB_POST:
3163 : case ARM::LDRSH_POST: {
3164 0 : unsigned Rt = MI.getOperand(0).getReg();
3165 0 : unsigned Rm = MI.getOperand(3).getReg();
3166 0 : return (Rt == Rm) ? 4 : 3;
3167 : }
3168 :
3169 0 : case ARM::LDR_PRE_REG:
3170 : case ARM::LDRB_PRE_REG: {
3171 0 : unsigned Rt = MI.getOperand(0).getReg();
3172 0 : unsigned Rm = MI.getOperand(3).getReg();
3173 0 : if (Rt == Rm)
3174 : return 3;
3175 0 : unsigned ShOpVal = MI.getOperand(4).getImm();
3176 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3177 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3178 0 : if (!isSub &&
3179 0 : (ShImm == 0 ||
3180 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3181 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3182 0 : return 2;
3183 : return 3;
3184 : }
3185 :
3186 0 : case ARM::STR_PRE_REG:
3187 : case ARM::STRB_PRE_REG: {
3188 0 : unsigned ShOpVal = MI.getOperand(4).getImm();
3189 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3190 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3191 0 : if (!isSub &&
3192 0 : (ShImm == 0 ||
3193 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3194 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3195 0 : return 2;
3196 : return 3;
3197 : }
3198 :
3199 0 : case ARM::LDRH_PRE:
3200 : case ARM::STRH_PRE: {
3201 0 : unsigned Rt = MI.getOperand(0).getReg();
3202 0 : unsigned Rm = MI.getOperand(3).getReg();
3203 0 : if (!Rm)
3204 : return 2;
3205 0 : if (Rt == Rm)
3206 : return 3;
3207 0 : return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3208 : }
3209 :
3210 0 : case ARM::LDR_POST_REG:
3211 : case ARM::LDRB_POST_REG:
3212 : case ARM::LDRH_POST: {
3213 0 : unsigned Rt = MI.getOperand(0).getReg();
3214 0 : unsigned Rm = MI.getOperand(3).getReg();
3215 0 : return (Rt == Rm) ? 3 : 2;
3216 : }
3217 :
3218 : case ARM::LDR_PRE_IMM:
3219 : case ARM::LDRB_PRE_IMM:
3220 : case ARM::LDR_POST_IMM:
3221 : case ARM::LDRB_POST_IMM:
3222 : case ARM::STRB_POST_IMM:
3223 : case ARM::STRB_POST_REG:
3224 : case ARM::STRB_PRE_IMM:
3225 : case ARM::STRH_POST:
3226 : case ARM::STR_POST_IMM:
3227 : case ARM::STR_POST_REG:
3228 : case ARM::STR_PRE_IMM:
3229 : return 2;
3230 :
3231 0 : case ARM::LDRSB_PRE:
3232 : case ARM::LDRSH_PRE: {
3233 0 : unsigned Rm = MI.getOperand(3).getReg();
3234 0 : if (Rm == 0)
3235 : return 3;
3236 0 : unsigned Rt = MI.getOperand(0).getReg();
3237 0 : if (Rt == Rm)
3238 : return 4;
3239 0 : unsigned ShOpVal = MI.getOperand(4).getImm();
3240 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3241 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3242 0 : if (!isSub &&
3243 0 : (ShImm == 0 ||
3244 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3245 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3246 0 : return 3;
3247 : return 4;
3248 : }
3249 :
3250 0 : case ARM::LDRD: {
3251 0 : unsigned Rt = MI.getOperand(0).getReg();
3252 0 : unsigned Rn = MI.getOperand(2).getReg();
3253 0 : unsigned Rm = MI.getOperand(3).getReg();
3254 0 : if (Rm)
3255 0 : return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3256 : : 3;
3257 0 : return (Rt == Rn) ? 3 : 2;
3258 : }
3259 :
3260 0 : case ARM::STRD: {
3261 0 : unsigned Rm = MI.getOperand(3).getReg();
3262 0 : if (Rm)
3263 0 : return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3264 : : 3;
3265 : return 2;
3266 : }
3267 :
3268 0 : case ARM::LDRD_POST:
3269 : case ARM::t2LDRD_POST:
3270 0 : return 3;
3271 :
3272 0 : case ARM::STRD_POST:
3273 : case ARM::t2STRD_POST:
3274 0 : return 4;
3275 :
3276 0 : case ARM::LDRD_PRE: {
3277 0 : unsigned Rt = MI.getOperand(0).getReg();
3278 0 : unsigned Rn = MI.getOperand(3).getReg();
3279 0 : unsigned Rm = MI.getOperand(4).getReg();
3280 0 : if (Rm)
3281 0 : return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3282 : : 4;
3283 0 : return (Rt == Rn) ? 4 : 3;
3284 : }
3285 :
3286 0 : case ARM::t2LDRD_PRE: {
3287 0 : unsigned Rt = MI.getOperand(0).getReg();
3288 0 : unsigned Rn = MI.getOperand(3).getReg();
3289 0 : return (Rt == Rn) ? 4 : 3;
3290 : }
3291 :
3292 0 : case ARM::STRD_PRE: {
3293 0 : unsigned Rm = MI.getOperand(4).getReg();
3294 0 : if (Rm)
3295 0 : return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3296 : : 4;
3297 : return 3;
3298 : }
3299 :
3300 0 : case ARM::t2STRD_PRE:
3301 0 : return 3;
3302 :
3303 : case ARM::t2LDR_POST:
3304 : case ARM::t2LDRB_POST:
3305 : case ARM::t2LDRB_PRE:
3306 : case ARM::t2LDRSBi12:
3307 : case ARM::t2LDRSBi8:
3308 : case ARM::t2LDRSBpci:
3309 : case ARM::t2LDRSBs:
3310 : case ARM::t2LDRH_POST:
3311 : case ARM::t2LDRH_PRE:
3312 : case ARM::t2LDRSBT:
3313 : case ARM::t2LDRSB_POST:
3314 : case ARM::t2LDRSB_PRE:
3315 : case ARM::t2LDRSH_POST:
3316 : case ARM::t2LDRSH_PRE:
3317 : case ARM::t2LDRSHi12:
3318 : case ARM::t2LDRSHi8:
3319 : case ARM::t2LDRSHpci:
3320 : case ARM::t2LDRSHs:
3321 : return 2;
3322 :
3323 0 : case ARM::t2LDRDi8: {
3324 0 : unsigned Rt = MI.getOperand(0).getReg();
3325 0 : unsigned Rn = MI.getOperand(2).getReg();
3326 0 : return (Rt == Rn) ? 3 : 2;
3327 : }
3328 :
3329 : case ARM::t2STRB_POST:
3330 : case ARM::t2STRB_PRE:
3331 : case ARM::t2STRBs:
3332 : case ARM::t2STRDi8:
3333 : case ARM::t2STRH_POST:
3334 : case ARM::t2STRH_PRE:
3335 : case ARM::t2STRHs:
3336 : case ARM::t2STR_POST:
3337 : case ARM::t2STR_PRE:
3338 : case ARM::t2STRs:
3339 : return 2;
3340 : }
3341 : }
3342 :
3343 : // Return the number of 32-bit words loaded by LDM or stored by STM. If this
3344 : // can't be easily determined return 0 (missing MachineMemOperand).
3345 : //
3346 : // FIXME: The current MachineInstr design does not support relying on machine
3347 : // mem operands to determine the width of a memory access. Instead, we expect
3348 : // the target to provide this information based on the instruction opcode and
3349 : // operands. However, using MachineMemOperand is the best solution now for
3350 : // two reasons:
3351 : //
3352 : // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3353 : // operands. This is much more dangerous than using the MachineMemOperand
3354 : // sizes because CodeGen passes can insert/remove optional machine operands. In
3355 : // fact, it's totally incorrect for preRA passes and appears to be wrong for
3356 : // postRA passes as well.
3357 : //
3358 : // 2) getNumLDMAddresses is only used by the scheduling machine model and any
3359 : // machine model that calls this should handle the unknown (zero size) case.
3360 : //
3361 : // Long term, we should require a target hook that verifies MachineMemOperand
3362 : // sizes during MC lowering. That target hook should be local to MC lowering
3363 : // because we can't ensure that it is aware of other MI forms. Doing this will
3364 : // ensure that MachineMemOperands are correctly propagated through all passes.
3365 1351 : unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
3366 : unsigned Size = 0;
3367 31 : for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3368 : E = MI.memoperands_end();
3369 1382 : I != E; ++I) {
3370 31 : Size += (*I)->getSize();
3371 : }
3372 1351 : return Size / 4;
3373 : }
3374 :
3375 0 : static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3376 : unsigned NumRegs) {
3377 0 : unsigned UOps = 1 + NumRegs; // 1 for address computation.
3378 0 : switch (Opc) {
3379 : default:
3380 : break;
3381 0 : case ARM::VLDMDIA_UPD:
3382 : case ARM::VLDMDDB_UPD:
3383 : case ARM::VLDMSIA_UPD:
3384 : case ARM::VLDMSDB_UPD:
3385 : case ARM::VSTMDIA_UPD:
3386 : case ARM::VSTMDDB_UPD:
3387 : case ARM::VSTMSIA_UPD:
3388 : case ARM::VSTMSDB_UPD:
3389 : case ARM::LDMIA_UPD:
3390 : case ARM::LDMDA_UPD:
3391 : case ARM::LDMDB_UPD:
3392 : case ARM::LDMIB_UPD:
3393 : case ARM::STMIA_UPD:
3394 : case ARM::STMDA_UPD:
3395 : case ARM::STMDB_UPD:
3396 : case ARM::STMIB_UPD:
3397 : case ARM::tLDMIA_UPD:
3398 : case ARM::tSTMIA_UPD:
3399 : case ARM::t2LDMIA_UPD:
3400 : case ARM::t2LDMDB_UPD:
3401 : case ARM::t2STMIA_UPD:
3402 : case ARM::t2STMDB_UPD:
3403 0 : ++UOps; // One for base register writeback.
3404 0 : break;
3405 0 : case ARM::LDMIA_RET:
3406 : case ARM::tPOP_RET:
3407 : case ARM::t2LDMIA_RET:
3408 0 : UOps += 2; // One for base reg wb, one for write to pc.
3409 0 : break;
3410 : }
3411 0 : return UOps;
3412 : }
3413 :
3414 4457 : unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
3415 : const MachineInstr &MI) const {
3416 4457 : if (!ItinData || ItinData->isEmpty())
3417 : return 1;
3418 :
3419 4457 : const MCInstrDesc &Desc = MI.getDesc();
3420 4457 : unsigned Class = Desc.getSchedClass();
3421 : int ItinUOps = ItinData->getNumMicroOps(Class);
3422 4457 : if (ItinUOps >= 0) {
3423 0 : if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3424 0 : return getNumMicroOpsSwiftLdSt(ItinData, MI);
3425 :
3426 0 : return ItinUOps;
3427 : }
3428 :
3429 : unsigned Opc = MI.getOpcode();
3430 4457 : switch (Opc) {
3431 0 : default:
3432 0 : llvm_unreachable("Unexpected multi-uops instruction!");
3433 : case ARM::VLDMQIA:
3434 : case ARM::VSTMQIA:
3435 : return 2;
3436 :
3437 : // The number of uOps for load / store multiple are determined by the number
3438 : // registers.
3439 : //
3440 : // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3441 : // same cycle. The scheduling for the first load / store must be done
3442 : // separately by assuming the address is not 64-bit aligned.
3443 : //
3444 : // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3445 : // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3446 : // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3447 414 : case ARM::VLDMDIA:
3448 : case ARM::VLDMDIA_UPD:
3449 : case ARM::VLDMDDB_UPD:
3450 : case ARM::VLDMSIA:
3451 : case ARM::VLDMSIA_UPD:
3452 : case ARM::VLDMSDB_UPD:
3453 : case ARM::VSTMDIA:
3454 : case ARM::VSTMDIA_UPD:
3455 : case ARM::VSTMDDB_UPD:
3456 : case ARM::VSTMSIA:
3457 : case ARM::VSTMSIA_UPD:
3458 : case ARM::VSTMSDB_UPD: {
3459 414 : unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3460 414 : return (NumRegs / 2) + (NumRegs % 2) + 1;
3461 : }
3462 :
3463 4043 : case ARM::LDMIA_RET:
3464 : case ARM::LDMIA:
3465 : case ARM::LDMDA:
3466 : case ARM::LDMDB:
3467 : case ARM::LDMIB:
3468 : case ARM::LDMIA_UPD:
3469 : case ARM::LDMDA_UPD:
3470 : case ARM::LDMDB_UPD:
3471 : case ARM::LDMIB_UPD:
3472 : case ARM::STMIA:
3473 : case ARM::STMDA:
3474 : case ARM::STMDB:
3475 : case ARM::STMIB:
3476 : case ARM::STMIA_UPD:
3477 : case ARM::STMDA_UPD:
3478 : case ARM::STMDB_UPD:
3479 : case ARM::STMIB_UPD:
3480 : case ARM::tLDMIA:
3481 : case ARM::tLDMIA_UPD:
3482 : case ARM::tSTMIA_UPD:
3483 : case ARM::tPOP_RET:
3484 : case ARM::tPOP:
3485 : case ARM::tPUSH:
3486 : case ARM::t2LDMIA_RET:
3487 : case ARM::t2LDMIA:
3488 : case ARM::t2LDMDB:
3489 : case ARM::t2LDMIA_UPD:
3490 : case ARM::t2LDMDB_UPD:
3491 : case ARM::t2STMIA:
3492 : case ARM::t2STMDB:
3493 : case ARM::t2STMIA_UPD:
3494 : case ARM::t2STMDB_UPD: {
3495 4043 : unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3496 4043 : switch (Subtarget.getLdStMultipleTiming()) {
3497 0 : case ARMSubtarget::SingleIssuePlusExtras:
3498 0 : return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3499 : case ARMSubtarget::SingleIssue:
3500 : // Assume the worst.
3501 : return NumRegs;
3502 277 : case ARMSubtarget::DoubleIssue: {
3503 277 : if (NumRegs < 4)
3504 : return 2;
3505 : // 4 registers would be issued: 2, 2.
3506 : // 5 registers would be issued: 2, 2, 1.
3507 97 : unsigned UOps = (NumRegs / 2);
3508 97 : if (NumRegs % 2)
3509 33 : ++UOps;
3510 : return UOps;
3511 : }
3512 64 : case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
3513 64 : unsigned UOps = (NumRegs / 2);
3514 : // If there are odd number of registers or if it's not 64-bit aligned,
3515 : // then it takes an extra AGU (Address Generation Unit) cycle.
3516 114 : if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3517 0 : (*MI.memoperands_begin())->getAlignment() < 8)
3518 64 : ++UOps;
3519 : return UOps;
3520 : }
3521 0 : }
3522 : }
3523 : }
3524 0 : llvm_unreachable("Didn't find the number of microops");
3525 : }
3526 :
3527 : int
3528 204 : ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3529 : const MCInstrDesc &DefMCID,
3530 : unsigned DefClass,
3531 : unsigned DefIdx, unsigned DefAlign) const {
3532 204 : int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3533 204 : if (RegNo <= 0)
3534 : // Def is the address writeback.
3535 : return ItinData->getOperandCycle(DefClass, DefIdx);
3536 :
3537 : int DefCycle;
3538 204 : if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3539 : // (regno / 2) + (regno % 2) + 1
3540 16 : DefCycle = RegNo / 2 + 1;
3541 16 : if (RegNo % 2)
3542 7 : ++DefCycle;
3543 186 : } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3544 : DefCycle = RegNo;
3545 : bool isSLoad = false;
3546 :
3547 4 : switch (DefMCID.getOpcode()) {
3548 : default: break;
3549 : case ARM::VLDMSIA:
3550 : case ARM::VLDMSIA_UPD:
3551 : case ARM::VLDMSDB_UPD:
3552 : isSLoad = true;
3553 : break;
3554 : }
3555 :
3556 : // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3557 : // then it takes an extra cycle.
3558 2 : if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3559 0 : ++DefCycle;
3560 : } else {
3561 : // Assume the worst.
3562 186 : DefCycle = RegNo + 2;
3563 : }
3564 :
3565 : return DefCycle;
3566 : }
3567 :
3568 0 : bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
3569 0 : unsigned BaseReg = MI.getOperand(0).getReg();
3570 0 : for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
3571 : const auto &Op = MI.getOperand(i);
3572 0 : if (Op.isReg() && Op.getReg() == BaseReg)
3573 : return true;
3574 : }
3575 : return false;
3576 : }
3577 : unsigned
3578 7 : ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
3579 : // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops
3580 : // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops)
3581 14 : return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
3582 : }
3583 :
3584 : int
3585 394 : ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3586 : const MCInstrDesc &DefMCID,
3587 : unsigned DefClass,
3588 : unsigned DefIdx, unsigned DefAlign) const {
3589 394 : int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3590 394 : if (RegNo <= 0)
3591 : // Def is the address writeback.
3592 : return ItinData->getOperandCycle(DefClass, DefIdx);
3593 :
3594 : int DefCycle;
3595 394 : if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3596 : // 4 registers would be issued: 1, 2, 1.
3597 : // 5 registers would be issued: 1, 2, 2.
3598 13 : DefCycle = RegNo / 2;
3599 13 : if (DefCycle < 1)
3600 : DefCycle = 1;
3601 : // Result latency is issue cycle + 2: E2.
3602 13 : DefCycle += 2;
3603 375 : } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3604 6 : DefCycle = (RegNo / 2);
3605 : // If there are odd number of registers or if it's not 64-bit aligned,
3606 : // then it takes an extra AGU (Address Generation Unit) cycle.
3607 6 : if ((RegNo % 2) || DefAlign < 8)
3608 6 : ++DefCycle;
3609 : // Result latency is AGU cycles + 2.
3610 6 : DefCycle += 2;
3611 : } else {
3612 : // Assume the worst.
3613 375 : DefCycle = RegNo + 2;
3614 : }
3615 :
3616 : return DefCycle;
3617 : }
3618 :
3619 : int
3620 12 : ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3621 : const MCInstrDesc &UseMCID,
3622 : unsigned UseClass,
3623 : unsigned UseIdx, unsigned UseAlign) const {
3624 12 : int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3625 12 : if (RegNo <= 0)
3626 : return ItinData->getOperandCycle(UseClass, UseIdx);
3627 :
3628 : int UseCycle;
3629 12 : if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3630 : // (regno / 2) + (regno % 2) + 1
3631 6 : UseCycle = RegNo / 2 + 1;
3632 6 : if (RegNo % 2)
3633 1 : ++UseCycle;
3634 3 : } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3635 : UseCycle = RegNo;
3636 : bool isSStore = false;
3637 :
3638 6 : switch (UseMCID.getOpcode()) {
3639 : default: break;
3640 : case ARM::VSTMSIA:
3641 : case ARM::VSTMSIA_UPD:
3642 : case ARM::VSTMSDB_UPD:
3643 : isSStore = true;
3644 : break;
3645 : }
3646 :
3647 : // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3648 : // then it takes an extra cycle.
3649 3 : if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3650 0 : ++UseCycle;
3651 : } else {
3652 : // Assume the worst.
3653 3 : UseCycle = RegNo + 2;
3654 : }
3655 :
3656 : return UseCycle;
3657 : }
3658 :
3659 : int
3660 286 : ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3661 : const MCInstrDesc &UseMCID,
3662 : unsigned UseClass,
3663 : unsigned UseIdx, unsigned UseAlign) const {
3664 286 : int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3665 286 : if (RegNo <= 0)
3666 : return ItinData->getOperandCycle(UseClass, UseIdx);
3667 :
3668 : int UseCycle;
3669 286 : if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3670 15 : UseCycle = RegNo / 2;
3671 15 : if (UseCycle < 2)
3672 : UseCycle = 2;
3673 : // Read in E3.
3674 15 : UseCycle += 2;
3675 271 : } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3676 0 : UseCycle = (RegNo / 2);
3677 : // If there are odd number of registers or if it's not 64-bit aligned,
3678 : // then it takes an extra AGU (Address Generation Unit) cycle.
3679 0 : if ((RegNo % 2) || UseAlign < 8)
3680 0 : ++UseCycle;
3681 : } else {
3682 : // Assume the worst.
3683 : UseCycle = 1;
3684 : }
3685 : return UseCycle;
3686 : }
3687 :
3688 : int
3689 68207 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3690 : const MCInstrDesc &DefMCID,
3691 : unsigned DefIdx, unsigned DefAlign,
3692 : const MCInstrDesc &UseMCID,
3693 : unsigned UseIdx, unsigned UseAlign) const {
3694 68207 : unsigned DefClass = DefMCID.getSchedClass();
3695 68207 : unsigned UseClass = UseMCID.getSchedClass();
3696 :
3697 136414 : if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3698 65846 : return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3699 :
3700 : // This may be a def / use of a variable_ops instruction, the operand
3701 : // latency might be determinable dynamically. Let the target try to
3702 : // figure it out.
3703 : int DefCycle = -1;
3704 : bool LdmBypass = false;
3705 4722 : switch (DefMCID.getOpcode()) {
3706 : default:
3707 : DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3708 : break;
3709 :
3710 204 : case ARM::VLDMDIA:
3711 : case ARM::VLDMDIA_UPD:
3712 : case ARM::VLDMDDB_UPD:
3713 : case ARM::VLDMSIA:
3714 : case ARM::VLDMSIA_UPD:
3715 : case ARM::VLDMSDB_UPD:
3716 204 : DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3717 204 : break;
3718 :
3719 394 : case ARM::LDMIA_RET:
3720 : case ARM::LDMIA:
3721 : case ARM::LDMDA:
3722 : case ARM::LDMDB:
3723 : case ARM::LDMIB:
3724 : case ARM::LDMIA_UPD:
3725 : case ARM::LDMDA_UPD:
3726 : case ARM::LDMDB_UPD:
3727 : case ARM::LDMIB_UPD:
3728 : case ARM::tLDMIA:
3729 : case ARM::tLDMIA_UPD:
3730 : case ARM::tPUSH:
3731 : case ARM::t2LDMIA_RET:
3732 : case ARM::t2LDMIA:
3733 : case ARM::t2LDMDB:
3734 : case ARM::t2LDMIA_UPD:
3735 : case ARM::t2LDMDB_UPD:
3736 : LdmBypass = true;
3737 394 : DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3738 394 : break;
3739 : }
3740 :
3741 2361 : if (DefCycle == -1)
3742 : // We can't seem to determine the result latency of the def, assume it's 2.
3743 : DefCycle = 2;
3744 :
3745 : int UseCycle = -1;
3746 4722 : switch (UseMCID.getOpcode()) {
3747 : default:
3748 : UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3749 499 : break;
3750 :
3751 12 : case ARM::VSTMDIA:
3752 : case ARM::VSTMDIA_UPD:
3753 : case ARM::VSTMDDB_UPD:
3754 : case ARM::VSTMSIA:
3755 : case ARM::VSTMSIA_UPD:
3756 : case ARM::VSTMSDB_UPD:
3757 12 : UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3758 12 : break;
3759 :
3760 286 : case ARM::STMIA:
3761 : case ARM::STMDA:
3762 : case ARM::STMDB:
3763 : case ARM::STMIB:
3764 : case ARM::STMIA_UPD:
3765 : case ARM::STMDA_UPD:
3766 : case ARM::STMDB_UPD:
3767 : case ARM::STMIB_UPD:
3768 : case ARM::tSTMIA_UPD:
3769 : case ARM::tPOP_RET:
3770 : case ARM::tPOP:
3771 : case ARM::t2STMIA:
3772 : case ARM::t2STMDB:
3773 : case ARM::t2STMIA_UPD:
3774 : case ARM::t2STMDB_UPD:
3775 286 : UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3776 286 : break;
3777 : }
3778 :
3779 797 : if (UseCycle == -1)
3780 : // Assume it's read in the first stage.
3781 : UseCycle = 1;
3782 :
3783 2361 : UseCycle = DefCycle - UseCycle + 1;
3784 2361 : if (UseCycle > 0) {
3785 2280 : if (LdmBypass) {
3786 : // It's a variable_ops instruction so we can't use DefIdx here. Just use
3787 : // first def operand.
3788 788 : if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3789 : UseClass, UseIdx))
3790 : --UseCycle;
3791 1886 : } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3792 : UseClass, UseIdx)) {
3793 : --UseCycle;
3794 : }
3795 : }
3796 :
3797 : return UseCycle;
3798 : }
3799 :
3800 425 : static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3801 : const MachineInstr *MI, unsigned Reg,
3802 : unsigned &DefIdx, unsigned &Dist) {
3803 425 : Dist = 0;
3804 :
3805 : MachineBasicBlock::const_iterator I = MI; ++I;
3806 : MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3807 : assert(II->isInsideBundle() && "Empty bundle?");
3808 :
3809 : int Idx = -1;
3810 438 : while (II->isInsideBundle()) {
3811 438 : Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3812 438 : if (Idx != -1)
3813 : break;
3814 : --II;
3815 13 : ++Dist;
3816 : }
3817 :
3818 : assert(Idx != -1 && "Cannot find bundled definition!");
3819 425 : DefIdx = Idx;
3820 425 : return &*II;
3821 : }
3822 :
3823 1294 : static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3824 : const MachineInstr &MI, unsigned Reg,
3825 : unsigned &UseIdx, unsigned &Dist) {
3826 1294 : Dist = 0;
3827 :
3828 1294 : MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
3829 : assert(II->isInsideBundle() && "Empty bundle?");
3830 1294 : MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
3831 :
3832 : // FIXME: This doesn't properly handle multiple uses.
3833 : int Idx = -1;
3834 2608 : while (II != E && II->isInsideBundle()) {
3835 2607 : Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3836 2607 : if (Idx != -1)
3837 : break;
3838 2628 : if (II->getOpcode() != ARM::t2IT)
3839 20 : ++Dist;
3840 : ++II;
3841 : }
3842 :
3843 1294 : if (Idx == -1) {
3844 1 : Dist = 0;
3845 1 : return nullptr;
3846 : }
3847 :
3848 1293 : UseIdx = Idx;
3849 1293 : return &*II;
3850 : }
3851 :
3852 : /// Return the number of cycles to add to (or subtract from) the static
3853 : /// itinerary based on the def opcode and alignment. The caller will ensure that
3854 : /// adjusted latency is at least one cycle.
3855 159925 : static int adjustDefLatency(const ARMSubtarget &Subtarget,
3856 : const MachineInstr &DefMI,
3857 : const MCInstrDesc &DefMCID, unsigned DefAlign) {
3858 : int Adjust = 0;
3859 299026 : if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3860 : // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3861 : // variants are one cycle cheaper.
3862 44248 : switch (DefMCID.getOpcode()) {
3863 : default: break;
3864 58 : case ARM::LDRrs:
3865 : case ARM::LDRBrs: {
3866 58 : unsigned ShOpVal = DefMI.getOperand(3).getImm();
3867 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3868 58 : if (ShImm == 0 ||
3869 27 : (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3870 : --Adjust;
3871 : break;
3872 : }
3873 67 : case ARM::t2LDRs:
3874 : case ARM::t2LDRBs:
3875 : case ARM::t2LDRHs:
3876 : case ARM::t2LDRSHs: {
3877 : // Thumb2 mode: lsl only.
3878 67 : unsigned ShAmt = DefMI.getOperand(3).getImm();
3879 67 : if (ShAmt == 0 || ShAmt == 2)
3880 : --Adjust;
3881 : break;
3882 : }
3883 : }
3884 137801 : } else if (Subtarget.isSwift()) {
3885 : // FIXME: Properly handle all of the latency adjustments for address
3886 : // writeback.
3887 6 : switch (DefMCID.getOpcode()) {
3888 : default: break;
3889 0 : case ARM::LDRrs:
3890 : case ARM::LDRBrs: {
3891 0 : unsigned ShOpVal = DefMI.getOperand(3).getImm();
3892 : bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3893 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3894 0 : if (!isSub &&
3895 0 : (ShImm == 0 ||
3896 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3897 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3898 : Adjust -= 2;
3899 0 : else if (!isSub &&
3900 0 : ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3901 : --Adjust;
3902 : break;
3903 : }
3904 0 : case ARM::t2LDRs:
3905 : case ARM::t2LDRBs:
3906 : case ARM::t2LDRHs:
3907 : case ARM::t2LDRSHs: {
3908 : // Thumb2 mode: lsl only.
3909 0 : unsigned ShAmt = DefMI.getOperand(3).getImm();
3910 0 : if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3911 : Adjust -= 2;
3912 : break;
3913 : }
3914 : }
3915 : }
3916 :
3917 159925 : if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
3918 8098 : switch (DefMCID.getOpcode()) {
3919 : default: break;
3920 194 : case ARM::VLD1q8:
3921 : case ARM::VLD1q16:
3922 : case ARM::VLD1q32:
3923 : case ARM::VLD1q64:
3924 : case ARM::VLD1q8wb_fixed:
3925 : case ARM::VLD1q16wb_fixed:
3926 : case ARM::VLD1q32wb_fixed:
3927 : case ARM::VLD1q64wb_fixed:
3928 : case ARM::VLD1q8wb_register:
3929 : case ARM::VLD1q16wb_register:
3930 : case ARM::VLD1q32wb_register:
3931 : case ARM::VLD1q64wb_register:
3932 : case ARM::VLD2d8:
3933 : case ARM::VLD2d16:
3934 : case ARM::VLD2d32:
3935 : case ARM::VLD2q8:
3936 : case ARM::VLD2q16:
3937 : case ARM::VLD2q32:
3938 : case ARM::VLD2d8wb_fixed:
3939 : case ARM::VLD2d16wb_fixed:
3940 : case ARM::VLD2d32wb_fixed:
3941 : case ARM::VLD2q8wb_fixed:
3942 : case ARM::VLD2q16wb_fixed:
3943 : case ARM::VLD2q32wb_fixed:
3944 : case ARM::VLD2d8wb_register:
3945 : case ARM::VLD2d16wb_register:
3946 : case ARM::VLD2d32wb_register:
3947 : case ARM::VLD2q8wb_register:
3948 : case ARM::VLD2q16wb_register:
3949 : case ARM::VLD2q32wb_register:
3950 : case ARM::VLD3d8:
3951 : case ARM::VLD3d16:
3952 : case ARM::VLD3d32:
3953 : case ARM::VLD1d64T:
3954 : case ARM::VLD3d8_UPD:
3955 : case ARM::VLD3d16_UPD:
3956 : case ARM::VLD3d32_UPD:
3957 : case ARM::VLD1d64Twb_fixed:
3958 : case ARM::VLD1d64Twb_register:
3959 : case ARM::VLD3q8_UPD:
3960 : case ARM::VLD3q16_UPD:
3961 : case ARM::VLD3q32_UPD:
3962 : case ARM::VLD4d8:
3963 : case ARM::VLD4d16:
3964 : case ARM::VLD4d32:
3965 : case ARM::VLD1d64Q:
3966 : case ARM::VLD4d8_UPD:
3967 : case ARM::VLD4d16_UPD:
3968 : case ARM::VLD4d32_UPD:
3969 : case ARM::VLD1d64Qwb_fixed:
3970 : case ARM::VLD1d64Qwb_register:
3971 : case ARM::VLD4q8_UPD:
3972 : case ARM::VLD4q16_UPD:
3973 : case ARM::VLD4q32_UPD:
3974 : case ARM::VLD1DUPq8:
3975 : case ARM::VLD1DUPq16:
3976 : case ARM::VLD1DUPq32:
3977 : case ARM::VLD1DUPq8wb_fixed:
3978 : case ARM::VLD1DUPq16wb_fixed:
3979 : case ARM::VLD1DUPq32wb_fixed:
3980 : case ARM::VLD1DUPq8wb_register:
3981 : case ARM::VLD1DUPq16wb_register:
3982 : case ARM::VLD1DUPq32wb_register:
3983 : case ARM::VLD2DUPd8:
3984 : case ARM::VLD2DUPd16:
3985 : case ARM::VLD2DUPd32:
3986 : case ARM::VLD2DUPd8wb_fixed:
3987 : case ARM::VLD2DUPd16wb_fixed:
3988 : case ARM::VLD2DUPd32wb_fixed:
3989 : case ARM::VLD2DUPd8wb_register:
3990 : case ARM::VLD2DUPd16wb_register:
3991 : case ARM::VLD2DUPd32wb_register:
3992 : case ARM::VLD4DUPd8:
3993 : case ARM::VLD4DUPd16:
3994 : case ARM::VLD4DUPd32:
3995 : case ARM::VLD4DUPd8_UPD:
3996 : case ARM::VLD4DUPd16_UPD:
3997 : case ARM::VLD4DUPd32_UPD:
3998 : case ARM::VLD1LNd8:
3999 : case ARM::VLD1LNd16:
4000 : case ARM::VLD1LNd32:
4001 : case ARM::VLD1LNd8_UPD:
4002 : case ARM::VLD1LNd16_UPD:
4003 : case ARM::VLD1LNd32_UPD:
4004 : case ARM::VLD2LNd8:
4005 : case ARM::VLD2LNd16:
4006 : case ARM::VLD2LNd32:
4007 : case ARM::VLD2LNq16:
4008 : case ARM::VLD2LNq32:
4009 : case ARM::VLD2LNd8_UPD:
4010 : case ARM::VLD2LNd16_UPD:
4011 : case ARM::VLD2LNd32_UPD:
4012 : case ARM::VLD2LNq16_UPD:
4013 : case ARM::VLD2LNq32_UPD:
4014 : case ARM::VLD4LNd8:
4015 : case ARM::VLD4LNd16:
4016 : case ARM::VLD4LNd32:
4017 : case ARM::VLD4LNq16:
4018 : case ARM::VLD4LNq32:
4019 : case ARM::VLD4LNd8_UPD:
4020 : case ARM::VLD4LNd16_UPD:
4021 : case ARM::VLD4LNd32_UPD:
4022 : case ARM::VLD4LNq16_UPD:
4023 : case ARM::VLD4LNq32_UPD:
4024 : // If the address is not 64-bit aligned, the latencies of these
4025 : // instructions increases by one.
4026 194 : ++Adjust;
4027 194 : break;
4028 : }
4029 : }
4030 159925 : return Adjust;
4031 : }
4032 :
4033 58077 : int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
4034 : const MachineInstr &DefMI,
4035 : unsigned DefIdx,
4036 : const MachineInstr &UseMI,
4037 : unsigned UseIdx) const {
4038 : // No operand latency. The caller may fall back to getInstrLatency.
4039 58077 : if (!ItinData || ItinData->isEmpty())
4040 : return -1;
4041 :
4042 58077 : const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4043 58077 : unsigned Reg = DefMO.getReg();
4044 :
4045 : const MachineInstr *ResolvedDefMI = &DefMI;
4046 58077 : unsigned DefAdj = 0;
4047 58077 : if (DefMI.isBundle())
4048 : ResolvedDefMI =
4049 425 : getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4050 58038 : if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4051 58020 : ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4052 : return 1;
4053 : }
4054 :
4055 : const MachineInstr *ResolvedUseMI = &UseMI;
4056 58018 : unsigned UseAdj = 0;
4057 58018 : if (UseMI.isBundle()) {
4058 : ResolvedUseMI =
4059 1294 : getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4060 1294 : if (!ResolvedUseMI)
4061 : return -1;
4062 : }
4063 :
4064 58017 : return getOperandLatencyImpl(
4065 : ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4066 58017 : Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4067 : }
4068 :
4069 58017 : int ARMBaseInstrInfo::getOperandLatencyImpl(
4070 : const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4071 : unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4072 : const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4073 : unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4074 58017 : if (Reg == ARM::CPSR) {
4075 4166 : if (DefMI.getOpcode() == ARM::FMSTAT) {
4076 : // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4077 457 : return Subtarget.isLikeA9() ? 1 : 20;
4078 : }
4079 :
4080 : // CPSR set and branch can be paired in the same cycle.
4081 1626 : if (UseMI.isBranch())
4082 : return 0;
4083 :
4084 : // Otherwise it takes the instruction latency (generally one).
4085 1626 : unsigned Latency = getInstrLatency(ItinData, DefMI);
4086 :
4087 : // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4088 : // its uses. Instructions which are otherwise scheduled between them may
4089 : // incur a code size penalty (not able to use the CPSR setting 16-bit
4090 : // instructions).
4091 1626 : if (Latency > 0 && Subtarget.isThumb2()) {
4092 641 : const MachineFunction *MF = DefMI.getParent()->getParent();
4093 : // FIXME: Use Function::optForSize().
4094 641 : if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
4095 28 : --Latency;
4096 : }
4097 1626 : return Latency;
4098 : }
4099 :
4100 55934 : if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4101 : return -1;
4102 :
4103 : unsigned DefAlign = DefMI.hasOneMemOperand()
4104 10635 : ? (*DefMI.memoperands_begin())->getAlignment()
4105 51794 : : 0;
4106 : unsigned UseAlign = UseMI.hasOneMemOperand()
4107 7766 : ? (*UseMI.memoperands_begin())->getAlignment()
4108 48925 : : 0;
4109 :
4110 : // Get the itinerary's latency if possible, and handle variable_ops.
4111 41159 : int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
4112 : UseIdx, UseAlign);
4113 : // Unable to find operand latency. The caller may resort to getInstrLatency.
4114 41159 : if (Latency < 0)
4115 : return Latency;
4116 :
4117 : // Adjust for IT block position.
4118 34299 : int Adj = DefAdj + UseAdj;
4119 :
4120 : // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4121 34299 : Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4122 34299 : if (Adj >= 0 || (int)Latency > -Adj) {
4123 34299 : return Latency + Adj;
4124 : }
4125 : // Return the itinerary latency, which may be zero but not less than zero.
4126 : return Latency;
4127 : }
4128 :
4129 : int
4130 73716 : ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
4131 : SDNode *DefNode, unsigned DefIdx,
4132 : SDNode *UseNode, unsigned UseIdx) const {
4133 73716 : if (!DefNode->isMachineOpcode())
4134 : return 1;
4135 :
4136 51474 : const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4137 :
4138 51474 : if (isZeroCost(DefMCID.Opcode))
4139 : return 0;
4140 :
4141 44231 : if (!ItinData || ItinData->isEmpty())
4142 4074 : return DefMCID.mayLoad() ? 3 : 1;
4143 :
4144 42194 : if (!UseNode->isMachineOpcode()) {
4145 15146 : int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4146 15146 : int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4147 15146 : int Threshold = 1 + Adj;
4148 15146 : return Latency <= Threshold ? 1 : Latency - Adj;
4149 : }
4150 :
4151 : const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4152 : const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
4153 27048 : unsigned DefAlign = !DefMN->memoperands_empty()
4154 8129 : ? (*DefMN->memoperands_begin())->getAlignment() : 0;
4155 : const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
4156 27048 : unsigned UseAlign = !UseMN->memoperands_empty()
4157 5980 : ? (*UseMN->memoperands_begin())->getAlignment() : 0;
4158 27048 : int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
4159 : UseMCID, UseIdx, UseAlign);
4160 :
4161 27048 : if (Latency > 1 &&
4162 23567 : (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4163 : Subtarget.isCortexA7())) {
4164 : // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4165 : // variants are one cycle cheaper.
4166 3472 : switch (DefMCID.getOpcode()) {
4167 : default: break;
4168 9 : case ARM::LDRrs:
4169 : case ARM::LDRBrs: {
4170 : unsigned ShOpVal =
4171 27 : cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4172 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4173 9 : if (ShImm == 0 ||
4174 7 : (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4175 9 : --Latency;
4176 : break;
4177 : }
4178 18 : case ARM::t2LDRs:
4179 : case ARM::t2LDRBs:
4180 : case ARM::t2LDRHs:
4181 : case ARM::t2LDRSHs: {
4182 : // Thumb2 mode: lsl only.
4183 : unsigned ShAmt =
4184 36 : cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4185 18 : if (ShAmt == 0 || ShAmt == 2)
4186 17 : --Latency;
4187 : break;
4188 : }
4189 : }
4190 25312 : } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
4191 : // FIXME: Properly handle all of the latency adjustments for address
4192 : // writeback.
4193 0 : switch (DefMCID.getOpcode()) {
4194 : default: break;
4195 0 : case ARM::LDRrs:
4196 : case ARM::LDRBrs: {
4197 : unsigned ShOpVal =
4198 0 : cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
4199 : unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4200 0 : if (ShImm == 0 ||
4201 0 : ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4202 : ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4203 0 : Latency -= 2;
4204 0 : else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4205 0 : --Latency;
4206 : break;
4207 : }
4208 0 : case ARM::t2LDRs:
4209 : case ARM::t2LDRBs:
4210 : case ARM::t2LDRHs:
4211 : case ARM::t2LDRSHs:
4212 : // Thumb2 mode: lsl 0-3 only.
4213 0 : Latency -= 2;
4214 0 : break;
4215 : }
4216 : }
4217 :
4218 27048 : if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4219 1858 : switch (DefMCID.getOpcode()) {
4220 : default: break;
4221 19 : case ARM::VLD1q8:
4222 : case ARM::VLD1q16:
4223 : case ARM::VLD1q32:
4224 : case ARM::VLD1q64:
4225 : case ARM::VLD1q8wb_register:
4226 : case ARM::VLD1q16wb_register:
4227 : case ARM::VLD1q32wb_register:
4228 : case ARM::VLD1q64wb_register:
4229 : case ARM::VLD1q8wb_fixed:
4230 : case ARM::VLD1q16wb_fixed:
4231 : case ARM::VLD1q32wb_fixed:
4232 : case ARM::VLD1q64wb_fixed:
4233 : case ARM::VLD2d8:
4234 : case ARM::VLD2d16:
4235 : case ARM::VLD2d32:
4236 : case ARM::VLD2q8Pseudo:
4237 : case ARM::VLD2q16Pseudo:
4238 : case ARM::VLD2q32Pseudo:
4239 : case ARM::VLD2d8wb_fixed:
4240 : case ARM::VLD2d16wb_fixed:
4241 : case ARM::VLD2d32wb_fixed:
4242 : case ARM::VLD2q8PseudoWB_fixed:
4243 : case ARM::VLD2q16PseudoWB_fixed:
4244 : case ARM::VLD2q32PseudoWB_fixed:
4245 : case ARM::VLD2d8wb_register:
4246 : case ARM::VLD2d16wb_register:
4247 : case ARM::VLD2d32wb_register:
4248 : case ARM::VLD2q8PseudoWB_register:
4249 : case ARM::VLD2q16PseudoWB_register:
4250 : case ARM::VLD2q32PseudoWB_register:
4251 : case ARM::VLD3d8Pseudo:
4252 : case ARM::VLD3d16Pseudo:
4253 : case ARM::VLD3d32Pseudo:
4254 : case ARM::VLD1d8TPseudo:
4255 : case ARM::VLD1d16TPseudo:
4256 : case ARM::VLD1d32TPseudo:
4257 : case ARM::VLD1d64TPseudo:
4258 : case ARM::VLD1d64TPseudoWB_fixed:
4259 : case ARM::VLD1d64TPseudoWB_register:
4260 : case ARM::VLD3d8Pseudo_UPD:
4261 : case ARM::VLD3d16Pseudo_UPD:
4262 : case ARM::VLD3d32Pseudo_UPD:
4263 : case ARM::VLD3q8Pseudo_UPD:
4264 : case ARM::VLD3q16Pseudo_UPD:
4265 : case ARM::VLD3q32Pseudo_UPD:
4266 : case ARM::VLD3q8oddPseudo:
4267 : case ARM::VLD3q16oddPseudo:
4268 : case ARM::VLD3q32oddPseudo:
4269 : case ARM::VLD3q8oddPseudo_UPD:
4270 : case ARM::VLD3q16oddPseudo_UPD:
4271 : case ARM::VLD3q32oddPseudo_UPD:
4272 : case ARM::VLD4d8Pseudo:
4273 : case ARM::VLD4d16Pseudo:
4274 : case ARM::VLD4d32Pseudo:
4275 : case ARM::VLD1d8QPseudo:
4276 : case ARM::VLD1d16QPseudo:
4277 : case ARM::VLD1d32QPseudo:
4278 : case ARM::VLD1d64QPseudo:
4279 : case ARM::VLD1d64QPseudoWB_fixed:
4280 : case ARM::VLD1d64QPseudoWB_register:
4281 : case ARM::VLD1q8HighQPseudo:
4282 : case ARM::VLD1q8LowQPseudo_UPD:
4283 : case ARM::VLD1q8HighTPseudo:
4284 : case ARM::VLD1q8LowTPseudo_UPD:
4285 : case ARM::VLD1q16HighQPseudo:
4286 : case ARM::VLD1q16LowQPseudo_UPD:
4287 : case ARM::VLD1q16HighTPseudo:
4288 : case ARM::VLD1q16LowTPseudo_UPD:
4289 : case ARM::VLD1q32HighQPseudo:
4290 : case ARM::VLD1q32LowQPseudo_UPD:
4291 : case ARM::VLD1q32HighTPseudo:
4292 : case ARM::VLD1q32LowTPseudo_UPD:
4293 : case ARM::VLD1q64HighQPseudo:
4294 : case ARM::VLD1q64LowQPseudo_UPD:
4295 : case ARM::VLD1q64HighTPseudo:
4296 : case ARM::VLD1q64LowTPseudo_UPD:
4297 : case ARM::VLD4d8Pseudo_UPD:
4298 : case ARM::VLD4d16Pseudo_UPD:
4299 : case ARM::VLD4d32Pseudo_UPD:
4300 : case ARM::VLD4q8Pseudo_UPD:
4301 : case ARM::VLD4q16Pseudo_UPD:
4302 : case ARM::VLD4q32Pseudo_UPD:
4303 : case ARM::VLD4q8oddPseudo:
4304 : case ARM::VLD4q16oddPseudo:
4305 : case ARM::VLD4q32oddPseudo:
4306 : case ARM::VLD4q8oddPseudo_UPD:
4307 : case ARM::VLD4q16oddPseudo_UPD:
4308 : case ARM::VLD4q32oddPseudo_UPD:
4309 : case ARM::VLD1DUPq8:
4310 : case ARM::VLD1DUPq16:
4311 : case ARM::VLD1DUPq32:
4312 : case ARM::VLD1DUPq8wb_fixed:
4313 : case ARM::VLD1DUPq16wb_fixed:
4314 : case ARM::VLD1DUPq32wb_fixed:
4315 : case ARM::VLD1DUPq8wb_register:
4316 : case ARM::VLD1DUPq16wb_register:
4317 : case ARM::VLD1DUPq32wb_register:
4318 : case ARM::VLD2DUPd8:
4319 : case ARM::VLD2DUPd16:
4320 : case ARM::VLD2DUPd32:
4321 : case ARM::VLD2DUPd8wb_fixed:
4322 : case ARM::VLD2DUPd16wb_fixed:
4323 : case ARM::VLD2DUPd32wb_fixed:
4324 : case ARM::VLD2DUPd8wb_register:
4325 : case ARM::VLD2DUPd16wb_register:
4326 : case ARM::VLD2DUPd32wb_register:
4327 : case ARM::VLD2DUPq8EvenPseudo:
4328 : case ARM::VLD2DUPq8OddPseudo:
4329 : case ARM::VLD2DUPq16EvenPseudo:
4330 : case ARM::VLD2DUPq16OddPseudo:
4331 : case ARM::VLD2DUPq32EvenPseudo:
4332 : case ARM::VLD2DUPq32OddPseudo:
4333 : case ARM::VLD3DUPq8EvenPseudo:
4334 : case ARM::VLD3DUPq8OddPseudo:
4335 : case ARM::VLD3DUPq16EvenPseudo:
4336 : case ARM::VLD3DUPq16OddPseudo:
4337 : case ARM::VLD3DUPq32EvenPseudo:
4338 : case ARM::VLD3DUPq32OddPseudo:
4339 : case ARM::VLD4DUPd8Pseudo:
4340 : case ARM::VLD4DUPd16Pseudo:
4341 : case ARM::VLD4DUPd32Pseudo:
4342 : case ARM::VLD4DUPd8Pseudo_UPD:
4343 : case ARM::VLD4DUPd16Pseudo_UPD:
4344 : case ARM::VLD4DUPd32Pseudo_UPD:
4345 : case ARM::VLD4DUPq8EvenPseudo:
4346 : case ARM::VLD4DUPq8OddPseudo:
4347 : case ARM::VLD4DUPq16EvenPseudo:
4348 : case ARM::VLD4DUPq16OddPseudo:
4349 : case ARM::VLD4DUPq32EvenPseudo:
4350 : case ARM::VLD4DUPq32OddPseudo:
4351 : case ARM::VLD1LNq8Pseudo:
4352 : case ARM::VLD1LNq16Pseudo:
4353 : case ARM::VLD1LNq32Pseudo:
4354 : case ARM::VLD1LNq8Pseudo_UPD:
4355 : case ARM::VLD1LNq16Pseudo_UPD:
4356 : case ARM::VLD1LNq32Pseudo_UPD:
4357 : case ARM::VLD2LNd8Pseudo:
4358 : case ARM::VLD2LNd16Pseudo:
4359 : case ARM::VLD2LNd32Pseudo:
4360 : case ARM::VLD2LNq16Pseudo:
4361 : case ARM::VLD2LNq32Pseudo:
4362 : case ARM::VLD2LNd8Pseudo_UPD:
4363 : case ARM::VLD2LNd16Pseudo_UPD:
4364 : case ARM::VLD2LNd32Pseudo_UPD:
4365 : case ARM::VLD2LNq16Pseudo_UPD:
4366 : case ARM::VLD2LNq32Pseudo_UPD:
4367 : case ARM::VLD4LNd8Pseudo:
4368 : case ARM::VLD4LNd16Pseudo:
4369 : case ARM::VLD4LNd32Pseudo:
4370 : case ARM::VLD4LNq16Pseudo:
4371 : case ARM::VLD4LNq32Pseudo:
4372 : case ARM::VLD4LNd8Pseudo_UPD:
4373 : case ARM::VLD4LNd16Pseudo_UPD:
4374 : case ARM::VLD4LNd32Pseudo_UPD:
4375 : case ARM::VLD4LNq16Pseudo_UPD:
4376 : case ARM::VLD4LNq32Pseudo_UPD:
4377 : // If the address is not 64-bit aligned, the latencies of these
4378 : // instructions increases by one.
4379 19 : ++Latency;
4380 19 : break;
4381 : }
4382 :
4383 : return Latency;
4384 : }
4385 :
4386 42555 : unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4387 42555 : if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4388 : MI.isImplicitDef())
4389 : return 0;
4390 :
4391 42540 : if (MI.isBundle())
4392 : return 0;
4393 :
4394 : const MCInstrDesc &MCID = MI.getDesc();
4395 :
4396 85006 : if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4397 2318 : !Subtarget.cheapPredicableCPSRDef())) {
4398 : // When predicated, CPSR is an additional source operand for CPSR updating
4399 : // instructions, this apparently increases their latencies.
4400 3455 : return 1;
4401 : }
4402 : return 0;
4403 : }
4404 :
4405 131779 : unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4406 : const MachineInstr &MI,
4407 : unsigned *PredCost) const {
4408 131752 : if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4409 : MI.isImplicitDef())
4410 : return 1;
4411 :
4412 : // An instruction scheduler typically runs on unbundled instructions, however
4413 : // other passes may query the latency of a bundled instruction.
4414 131697 : if (MI.isBundle()) {
4415 : unsigned Latency = 0;
4416 1614 : MachineBasicBlock::const_instr_iterator I = MI.getIterator();
4417 1614 : MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4418 5030 : while (++I != E && I->isInsideBundle()) {
4419 6832 : if (I->getOpcode() != ARM::t2IT)
4420 1963 : Latency += getInstrLatency(ItinData, *I, PredCost);
4421 : }
4422 : return Latency;
4423 : }
4424 :
4425 : const MCInstrDesc &MCID = MI.getDesc();
4426 130083 : if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4427 0 : !Subtarget.cheapPredicableCPSRDef()))) {
4428 : // When predicated, CPSR is an additional source operand for CPSR updating
4429 : // instructions, this apparently increases their latencies.
4430 0 : *PredCost = 1;
4431 : }
4432 : // Be sure to call getStageLatency for an empty itinerary in case it has a
4433 : // valid MinLatency property.
4434 130083 : if (!ItinData)
4435 0 : return MI.mayLoad() ? 3 : 1;
4436 :
4437 130083 : unsigned Class = MCID.getSchedClass();
4438 :
4439 : // For instructions with variable uops, use uops as latency.
4440 130083 : if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4441 4457 : return getNumMicroOps(ItinData, MI);
4442 :
4443 : // For the common case, fall back on the itinerary's latency.
4444 125626 : unsigned Latency = ItinData->getStageLatency(Class);
4445 :
4446 : // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4447 : unsigned DefAlign =
4448 153588 : MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
4449 125626 : int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4450 125626 : if (Adj >= 0 || (int)Latency > -Adj) {
4451 125595 : return Latency + Adj;
4452 : }
4453 : return Latency;
4454 : }
4455 :
4456 67150 : int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4457 : SDNode *Node) const {
4458 67150 : if (!Node->isMachineOpcode())
4459 : return 1;
4460 :
4461 67150 : if (!ItinData || ItinData->isEmpty())
4462 : return 1;
4463 :
4464 : unsigned Opcode = Node->getMachineOpcode();
4465 67150 : switch (Opcode) {
4466 67148 : default:
4467 201444 : return ItinData->getStageLatency(get(Opcode).getSchedClass());
4468 : case ARM::VLDMQIA:
4469 : case ARM::VSTMQIA:
4470 : return 2;
4471 : }
4472 : }
4473 :
4474 88 : bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4475 : const MachineRegisterInfo *MRI,
4476 : const MachineInstr &DefMI,
4477 : unsigned DefIdx,
4478 : const MachineInstr &UseMI,
4479 : unsigned UseIdx) const {
4480 88 : unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4481 88 : unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4482 88 : if (Subtarget.nonpipelinedVFP() &&
4483 20 : (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4484 : return true;
4485 :
4486 : // Hoist VFP / NEON instructions with 4 or higher latency.
4487 : unsigned Latency =
4488 88 : SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4489 88 : if (Latency <= 3)
4490 : return false;
4491 19 : return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4492 19 : UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4493 : }
4494 :
4495 444 : bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4496 : const MachineInstr &DefMI,
4497 : unsigned DefIdx) const {
4498 : const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4499 428 : if (!ItinData || ItinData->isEmpty())
4500 : return false;
4501 :
4502 428 : unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4503 428 : if (DDomain == ARMII::DomainGeneral) {
4504 386 : unsigned DefClass = DefMI.getDesc().getSchedClass();
4505 : int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
4506 386 : return (DefCycle != -1 && DefCycle <= 2);
4507 : }
4508 : return false;
4509 : }
4510 :
4511 1314003 : bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4512 : StringRef &ErrInfo) const {
4513 2628006 : if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4514 0 : ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4515 0 : return false;
4516 : }
4517 : return true;
4518 : }
4519 :
4520 : // LoadStackGuard has so far only been implemented for MachO. Different code
4521 : // sequence is needed for other targets.
4522 50 : void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
4523 : unsigned LoadImmOpc,
4524 : unsigned LoadOpc) const {
4525 : assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4526 : "ROPI/RWPI not currently supported with stack guard");
4527 :
4528 50 : MachineBasicBlock &MBB = *MI->getParent();
4529 : DebugLoc DL = MI->getDebugLoc();
4530 50 : unsigned Reg = MI->getOperand(0).getReg();
4531 : const GlobalValue *GV =
4532 50 : cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4533 50 : MachineInstrBuilder MIB;
4534 :
4535 100 : BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4536 : .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
4537 :
4538 50 : if (Subtarget.isGVIndirectSymbol(GV)) {
4539 84 : MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4540 42 : MIB.addReg(Reg, RegState::Kill).addImm(0);
4541 : auto Flags = MachineMemOperand::MOLoad |
4542 : MachineMemOperand::MODereferenceable |
4543 : MachineMemOperand::MOInvariant;
4544 84 : MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4545 : MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
4546 42 : MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
4547 : }
4548 :
4549 100 : MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4550 50 : MIB.addReg(Reg, RegState::Kill)
4551 : .addImm(0)
4552 : .cloneMemRefs(*MI)
4553 50 : .add(predOps(ARMCC::AL));
4554 50 : }
4555 :
4556 : bool
4557 578 : ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4558 : unsigned &AddSubOpc,
4559 : bool &NegAcc, bool &HasLane) const {
4560 578 : DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
4561 578 : if (I == MLxEntryMap.end())
4562 : return false;
4563 :
4564 16 : const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4565 16 : MulOpc = Entry.MulOpc;
4566 16 : AddSubOpc = Entry.AddSubOpc;
4567 16 : NegAcc = Entry.NegAcc;
4568 16 : HasLane = Entry.HasLane;
4569 16 : return true;
4570 : }
4571 :
4572 : //===----------------------------------------------------------------------===//
4573 : // Execution domains.
4574 : //===----------------------------------------------------------------------===//
4575 : //
4576 : // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4577 : // and some can go down both. The vmov instructions go down the VFP pipeline,
4578 : // but they can be changed to vorr equivalents that are executed by the NEON
4579 : // pipeline.
4580 : //
4581 : // We use the following execution domain numbering:
4582 : //
4583 : enum ARMExeDomain {
4584 : ExeGeneric = 0,
4585 : ExeVFP = 1,
4586 : ExeNEON = 2
4587 : };
4588 :
4589 : //
4590 : // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4591 : //
4592 : std::pair<uint16_t, uint16_t>
4593 98592 : ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
4594 : // If we don't have access to NEON instructions then we won't be able
4595 : // to swizzle anything to the NEON domain. Check to make sure.
4596 98592 : if (Subtarget.hasNEON()) {
4597 : // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4598 : // if they are not predicated.
4599 116270 : if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4600 305 : return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4601 :
4602 : // CortexA9 is particularly picky about mixing the two and wants these
4603 : // converted.
4604 57830 : if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4605 1494 : (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4606 : MI.getOpcode() == ARM::VMOVS))
4607 153 : return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4608 : }
4609 : // No other instructions can be swizzled, so just determine their domain.
4610 98134 : unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4611 :
4612 98134 : if (Domain & ARMII::DomainNEON)
4613 17971 : return std::make_pair(ExeNEON, 0);
4614 :
4615 : // Certain instructions can go either way on Cortex-A8.
4616 : // Treat them as NEON instructions.
4617 80163 : if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4618 128 : return std::make_pair(ExeNEON, 0);
4619 :
4620 80035 : if (Domain & ARMII::DomainVFP)
4621 5126 : return std::make_pair(ExeVFP, 0);
4622 :
4623 74909 : return std::make_pair(ExeGeneric, 0);
4624 : }
4625 :
4626 69 : static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
4627 : unsigned SReg, unsigned &Lane) {
4628 69 : unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4629 69 : Lane = 0;
4630 :
4631 69 : if (DReg != ARM::NoRegister)
4632 : return DReg;
4633 :
4634 24 : Lane = 1;
4635 24 : DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4636 :
4637 : assert(DReg && "S-register with no D super-register?");
4638 24 : return DReg;
4639 : }
4640 :
4641 : /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4642 : /// set ImplicitSReg to a register number that must be marked as implicit-use or
4643 : /// zero if no register needs to be defined as implicit-use.
4644 : ///
4645 : /// If the function cannot determine if an SPR should be marked implicit use or
4646 : /// not, it returns false.
4647 : ///
4648 : /// This function handles cases where an instruction is being modified from taking
4649 : /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4650 : /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4651 : /// lane of the DPR).
4652 : ///
4653 : /// If the other SPR is defined, an implicit-use of it should be added. Else,
4654 : /// (including the case where the DPR itself is defined), it should not.
4655 : ///
4656 21 : static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
4657 : MachineInstr &MI, unsigned DReg,
4658 : unsigned Lane, unsigned &ImplicitSReg) {
4659 : // If the DPR is defined or used already, the other SPR lane will be chained
4660 : // correctly, so there is nothing to be done.
4661 31 : if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4662 13 : ImplicitSReg = 0;
4663 13 : return true;
4664 : }
4665 :
4666 : // Otherwise we need to go searching to see if the SPR is set explicitly.
4667 8 : ImplicitSReg = TRI->getSubReg(DReg,
4668 8 : (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4669 : MachineBasicBlock::LivenessQueryResult LQR =
4670 16 : MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4671 :
4672 8 : if (LQR == MachineBasicBlock::LQR_Live)
4673 : return true;
4674 8 : else if (LQR == MachineBasicBlock::LQR_Unknown)
4675 : return false;
4676 :
4677 : // If the register is known not to be live, there is no need to add an
4678 : // implicit-use.
4679 8 : ImplicitSReg = 0;
4680 8 : return true;
4681 : }
4682 :
4683 458 : void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
4684 : unsigned Domain) const {
4685 : unsigned DstReg, SrcReg, DReg;
4686 : unsigned Lane;
4687 458 : MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4688 458 : const TargetRegisterInfo *TRI = &getRegisterInfo();
4689 916 : switch (MI.getOpcode()) {
4690 0 : default:
4691 0 : llvm_unreachable("cannot handle opcode!");
4692 : break;
4693 305 : case ARM::VMOVD:
4694 305 : if (Domain != ExeNEON)
4695 : break;
4696 :
4697 : // Zap the predicate operands.
4698 : assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4699 :
4700 : // Make sure we've got NEON instructions.
4701 : assert(Subtarget.hasNEON() && "VORRd requires NEON");
4702 :
4703 : // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4704 180 : DstReg = MI.getOperand(0).getReg();
4705 180 : SrcReg = MI.getOperand(1).getReg();
4706 :
4707 900 : for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4708 900 : MI.RemoveOperand(i - 1);
4709 :
4710 : // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4711 180 : MI.setDesc(get(ARM::VORRd));
4712 180 : MIB.addReg(DstReg, RegState::Define)
4713 180 : .addReg(SrcReg)
4714 180 : .addReg(SrcReg)
4715 180 : .add(predOps(ARMCC::AL));
4716 180 : break;
4717 59 : case ARM::VMOVRS:
4718 59 : if (Domain != ExeNEON)
4719 : break;
4720 : assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4721 :
4722 : // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4723 37 : DstReg = MI.getOperand(0).getReg();
4724 37 : SrcReg = MI.getOperand(1).getReg();
4725 :
4726 185 : for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4727 185 : MI.RemoveOperand(i - 1);
4728 :
4729 37 : DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4730 :
4731 : // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4732 : // Note that DSrc has been widened and the other lane may be undef, which
4733 : // contaminates the entire register.
4734 37 : MI.setDesc(get(ARM::VGETLNi32));
4735 37 : MIB.addReg(DstReg, RegState::Define)
4736 37 : .addReg(DReg, RegState::Undef)
4737 37 : .addImm(Lane)
4738 37 : .add(predOps(ARMCC::AL));
4739 :
4740 : // The old source should be an implicit use, otherwise we might think it
4741 : // was dead before here.
4742 37 : MIB.addReg(SrcReg, RegState::Implicit);
4743 37 : break;
4744 80 : case ARM::VMOVSR: {
4745 80 : if (Domain != ExeNEON)
4746 : break;
4747 : assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4748 :
4749 : // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4750 10 : DstReg = MI.getOperand(0).getReg();
4751 10 : SrcReg = MI.getOperand(1).getReg();
4752 :
4753 10 : DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4754 :
4755 : unsigned ImplicitSReg;
4756 10 : if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4757 : break;
4758 :
4759 50 : for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4760 40 : MI.RemoveOperand(i - 1);
4761 :
4762 : // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4763 : // Again DDst may be undefined at the beginning of this instruction.
4764 10 : MI.setDesc(get(ARM::VSETLNi32));
4765 10 : MIB.addReg(DReg, RegState::Define)
4766 10 : .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
4767 10 : .addReg(SrcReg)
4768 10 : .addImm(Lane)
4769 10 : .add(predOps(ARMCC::AL));
4770 :
4771 : // The narrower destination must be marked as set to keep previous chains
4772 : // in place.
4773 10 : MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4774 10 : if (ImplicitSReg != 0)
4775 0 : MIB.addReg(ImplicitSReg, RegState::Implicit);
4776 : break;
4777 : }
4778 14 : case ARM::VMOVS: {
4779 14 : if (Domain != ExeNEON)
4780 : break;
4781 :
4782 : // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4783 11 : DstReg = MI.getOperand(0).getReg();
4784 11 : SrcReg = MI.getOperand(1).getReg();
4785 :
4786 11 : unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4787 11 : DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4788 11 : DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4789 :
4790 : unsigned ImplicitSReg;
4791 11 : if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4792 : break;
4793 :
4794 55 : for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
4795 44 : MI.RemoveOperand(i - 1);
4796 :
4797 11 : if (DSrc == DDst) {
4798 : // Destination can be:
4799 : // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4800 1 : MI.setDesc(get(ARM::VDUPLN32d));
4801 1 : MIB.addReg(DDst, RegState::Define)
4802 1 : .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
4803 1 : .addImm(SrcLane)
4804 1 : .add(predOps(ARMCC::AL));
4805 :
4806 : // Neither the source or the destination are naturally represented any
4807 : // more, so add them in manually.
4808 1 : MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4809 1 : MIB.addReg(SrcReg, RegState::Implicit);
4810 1 : if (ImplicitSReg != 0)
4811 0 : MIB.addReg(ImplicitSReg, RegState::Implicit);
4812 : break;
4813 : }
4814 :
4815 : // In general there's no single instruction that can perform an S <-> S
4816 : // move in NEON space, but a pair of VEXT instructions *can* do the
4817 : // job. It turns out that the VEXTs needed will only use DSrc once, with
4818 : // the position based purely on the combination of lane-0 and lane-1
4819 : // involved. For example
4820 : // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
4821 : // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
4822 : // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
4823 : // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
4824 : //
4825 : // Pattern of the MachineInstrs is:
4826 : // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4827 10 : MachineInstrBuilder NewMIB;
4828 10 : NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
4829 20 : DDst);
4830 :
4831 : // On the first instruction, both DSrc and DDst may be undef if present.
4832 : // Specifically when the original instruction didn't have them as an
4833 : // <imp-use>.
4834 10 : unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4835 : bool CurUndef = !MI.readsRegister(CurReg, TRI);
4836 10 : NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4837 :
4838 10 : CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4839 : CurUndef = !MI.readsRegister(CurReg, TRI);
4840 10 : NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
4841 : .addImm(1)
4842 10 : .add(predOps(ARMCC::AL));
4843 :
4844 10 : if (SrcLane == DstLane)
4845 7 : NewMIB.addReg(SrcReg, RegState::Implicit);
4846 :
4847 10 : MI.setDesc(get(ARM::VEXTd32));
4848 10 : MIB.addReg(DDst, RegState::Define);
4849 :
4850 : // On the second instruction, DDst has definitely been defined above, so
4851 : // it is not undef. DSrc, if present, can be undef as above.
4852 10 : CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4853 11 : CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4854 10 : MIB.addReg(CurReg, getUndefRegState(CurUndef));
4855 :
4856 10 : CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4857 12 : CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
4858 10 : MIB.addReg(CurReg, getUndefRegState(CurUndef))
4859 : .addImm(1)
4860 10 : .add(predOps(ARMCC::AL));
4861 :
4862 10 : if (SrcLane != DstLane)
4863 3 : MIB.addReg(SrcReg, RegState::Implicit);
4864 :
4865 : // As before, the original destination is no longer represented, add it
4866 : // implicitly.
4867 10 : MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4868 10 : if (ImplicitSReg != 0)
4869 0 : MIB.addReg(ImplicitSReg, RegState::Implicit);
4870 : break;
4871 : }
4872 : }
4873 458 : }
4874 :
4875 : //===----------------------------------------------------------------------===//
4876 : // Partial register updates
4877 : //===----------------------------------------------------------------------===//
4878 : //
4879 : // Swift renames NEON registers with 64-bit granularity. That means any
4880 : // instruction writing an S-reg implicitly reads the containing D-reg. The
4881 : // problem is mostly avoided by translating f32 operations to v2f32 operations
4882 : // on D-registers, but f32 loads are still a problem.
4883 : //
4884 : // These instructions can load an f32 into a NEON register:
4885 : //
4886 : // VLDRS - Only writes S, partial D update.
4887 : // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4888 : // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4889 : //
4890 : // FCONSTD can be used as a dependency-breaking instruction.
4891 102661 : unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
4892 : const MachineInstr &MI, unsigned OpNum,
4893 : const TargetRegisterInfo *TRI) const {
4894 102661 : auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
4895 102661 : if (!PartialUpdateClearance)
4896 : return 0;
4897 :
4898 : assert(TRI && "Need TRI instance");
4899 :
4900 2894 : const MachineOperand &MO = MI.getOperand(OpNum);
4901 : if (MO.readsReg())
4902 : return 0;
4903 2894 : unsigned Reg = MO.getReg();
4904 : int UseOp = -1;
4905 :
4906 5788 : switch (MI.getOpcode()) {
4907 : // Normal instructions writing only an S-register.
4908 35 : case ARM::VLDRS:
4909 : case ARM::FCONSTS:
4910 : case ARM::VMOVSR:
4911 : case ARM::VMOVv8i8:
4912 : case ARM::VMOVv4i16:
4913 : case ARM::VMOVv2i32:
4914 : case ARM::VMOVv2f32:
4915 : case ARM::VMOVv1i64:
4916 35 : UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
4917 : break;
4918 :
4919 : // Explicitly reads the dependency.
4920 : case ARM::VLD1LNd32:
4921 : UseOp = 3;
4922 : break;
4923 : default:
4924 : return 0;
4925 : }
4926 :
4927 : // If this instruction actually reads a value from Reg, there is no unwanted
4928 : // dependency.
4929 39 : if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
4930 : return 0;
4931 :
4932 : // We must be able to clobber the whole D-reg.
4933 38 : if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4934 : // Virtual register must be a def undef foo:ssub_0 operand.
4935 0 : if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
4936 0 : return 0;
4937 38 : } else if (ARM::SPRRegClass.contains(Reg)) {
4938 : // Physical register: MI must define the full D-reg.
4939 17 : unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4940 : &ARM::DPRRegClass);
4941 34 : if (!DReg || !MI.definesRegister(DReg, TRI))
4942 14 : return 0;
4943 : }
4944 :
4945 : // MI has an unwanted D-register dependency.
4946 : // Avoid defs in the previous N instructrions.
4947 : return PartialUpdateClearance;
4948 : }
4949 :
4950 : // Break a partial register dependency after getPartialRegUpdateClearance
4951 : // returned non-zero.
4952 3 : void ARMBaseInstrInfo::breakPartialRegDependency(
4953 : MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4954 : assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
4955 : assert(TRI && "Need TRI instance");
4956 :
4957 3 : const MachineOperand &MO = MI.getOperand(OpNum);
4958 3 : unsigned Reg = MO.getReg();
4959 : assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
4960 : "Can't break virtual register dependencies.");
4961 : unsigned DReg = Reg;
4962 :
4963 : // If MI defines an S-reg, find the corresponding D super-register.
4964 3 : if (ARM::SPRRegClass.contains(Reg)) {
4965 0 : DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4966 : assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4967 : }
4968 :
4969 : assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4970 : assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4971 :
4972 : // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4973 : // the full D-register by loading the same value to both lanes. The
4974 : // instruction is micro-coded with 2 uops, so don't do this until we can
4975 : // properly schedule micro-coded instructions. The dispatcher stalls cause
4976 : // too big regressions.
4977 :
4978 : // Insert the dependency-breaking FCONSTD before MI.
4979 : // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4980 6 : BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
4981 : .addImm(96)
4982 3 : .add(predOps(ARMCC::AL));
4983 3 : MI.addRegisterKilled(DReg, TRI, true);
4984 3 : }
4985 :
4986 16 : bool ARMBaseInstrInfo::hasNOP() const {
4987 32 : return Subtarget.getFeatureBits()[ARM::HasV6KOps];
4988 : }
4989 :
4990 110 : bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
4991 110 : if (MI->getNumOperands() < 4)
4992 : return true;
4993 110 : unsigned ShOpVal = MI->getOperand(3).getImm();
4994 : unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4995 : // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4996 110 : if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4997 110 : ((ShImm == 1 || ShImm == 2) &&
4998 : ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4999 20 : return true;
5000 :
5001 : return false;
5002 : }
5003 :
5004 1595 : bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
5005 : const MachineInstr &MI, unsigned DefIdx,
5006 : SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5007 : assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5008 : assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5009 :
5010 3190 : switch (MI.getOpcode()) {
5011 1595 : case ARM::VMOVDRR:
5012 : // dX = VMOVDRR rY, rZ
5013 : // is the same as:
5014 : // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5015 : // Populate the InputRegs accordingly.
5016 : // rY
5017 1595 : const MachineOperand *MOReg = &MI.getOperand(1);
5018 1595 : if (!MOReg->isUndef())
5019 4785 : InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5020 : MOReg->getSubReg(), ARM::ssub_0));
5021 : // rZ
5022 1595 : MOReg = &MI.getOperand(2);
5023 1595 : if (!MOReg->isUndef())
5024 4785 : InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5025 : MOReg->getSubReg(), ARM::ssub_1));
5026 1595 : return true;
5027 : }
5028 0 : llvm_unreachable("Target dependent opcode missing");
5029 : }
5030 :
5031 3121 : bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
5032 : const MachineInstr &MI, unsigned DefIdx,
5033 : RegSubRegPairAndIdx &InputReg) const {
5034 : assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5035 : assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5036 :
5037 6242 : switch (MI.getOpcode()) {
5038 3121 : case ARM::VMOVRRD:
5039 : // rX, rY = VMOVRRD dZ
5040 : // is the same as:
5041 : // rX = EXTRACT_SUBREG dZ, ssub_0
5042 : // rY = EXTRACT_SUBREG dZ, ssub_1
5043 3121 : const MachineOperand &MOReg = MI.getOperand(2);
5044 3121 : if (MOReg.isUndef())
5045 : return false;
5046 3121 : InputReg.Reg = MOReg.getReg();
5047 3121 : InputReg.SubReg = MOReg.getSubReg();
5048 3121 : InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5049 3121 : return true;
5050 : }
5051 0 : llvm_unreachable("Target dependent opcode missing");
5052 : }
5053 :
5054 193 : bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
5055 : const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5056 : RegSubRegPairAndIdx &InsertedReg) const {
5057 : assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5058 : assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5059 :
5060 386 : switch (MI.getOpcode()) {
5061 193 : case ARM::VSETLNi32:
5062 : // dX = VSETLNi32 dY, rZ, imm
5063 193 : const MachineOperand &MOBaseReg = MI.getOperand(1);
5064 : const MachineOperand &MOInsertedReg = MI.getOperand(2);
5065 193 : if (MOInsertedReg.isUndef())
5066 : return false;
5067 : const MachineOperand &MOIndex = MI.getOperand(3);
5068 193 : BaseReg.Reg = MOBaseReg.getReg();
5069 193 : BaseReg.SubReg = MOBaseReg.getSubReg();
5070 :
5071 193 : InsertedReg.Reg = MOInsertedReg.getReg();
5072 193 : InsertedReg.SubReg = MOInsertedReg.getSubReg();
5073 193 : InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
5074 193 : return true;
5075 : }
5076 0 : llvm_unreachable("Target dependent opcode missing");
5077 : }
5078 :
5079 : std::pair<unsigned, unsigned>
5080 258 : ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5081 : const unsigned Mask = ARMII::MO_OPTION_MASK;
5082 258 : return std::make_pair(TF & Mask, TF & ~Mask);
5083 : }
5084 :
5085 : ArrayRef<std::pair<unsigned, const char *>>
5086 243 : ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5087 : using namespace ARMII;
5088 :
5089 : static const std::pair<unsigned, const char *> TargetFlags[] = {
5090 : {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
5091 243 : return makeArrayRef(TargetFlags);
5092 : }
5093 :
5094 : ArrayRef<std::pair<unsigned, const char *>>
5095 16 : ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5096 : using namespace ARMII;
5097 :
5098 : static const std::pair<unsigned, const char *> TargetFlags[] = {
5099 : {MO_COFFSTUB, "arm-coffstub"},
5100 : {MO_GOT, "arm-got"},
5101 : {MO_SBREL, "arm-sbrel"},
5102 : {MO_DLLIMPORT, "arm-dllimport"},
5103 : {MO_SECREL, "arm-secrel"},
5104 : {MO_NONLAZY, "arm-nonlazy"}};
5105 16 : return makeArrayRef(TargetFlags);
5106 : }
|